예제 #1
0
def test_kmp_precomputed_dictionary():
    n_samples = mult_dense.shape[0]
    cv = ShuffleSplit(n_samples,
                      n_iterations=1,
                      test_fraction=0.2,
                      random_state=0)
    train, test = list(cv)[0]
    X_train, y_train = mult_dense[train], mult_target[train]
    X_test, y_test = mult_dense[test], mult_target[test]

    components = select_components(X_train, y_train,
                                   n_components=0.3,
                                   random_state=0)
    K_train = pairwise_kernels(X_train, components)

    kmp = KMPClassifier(metric="precomputed")
    kmp.fit(K_train, y_train)
    y_pred = kmp.predict(K_train)
    acc = np.mean(y_pred == y_train)
    assert_true(acc >= 0.75)

    K_test = pairwise_kernels(X_test, components)
    y_pred = kmp.predict(K_test)

    acc = np.mean(y_pred == y_test)
    assert_true(acc >= 0.63)
예제 #2
0
def fit_kmp(X_train, y_train, X_test, y_test, class_distrib, opts, random_state):
    components = select_components(X_train, y_train,
                                   n_components=opts.n_components,
                                   class_distrib=class_distrib,
                                   random_state=random_state)

    clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs,
                        init_components=components,
                        n_refit=opts.n_refit,
                        estimator=Ridge(alpha=opts.alpha),
                        X_val=X_test, y_val=y_test,
                        metric=opts.metric,
                        gamma=opts.gamma,
                        degree=opts.degree,
                        coef0=opts.coef0,
                        scale=opts.scale,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        #score_func=f1_score,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(X_train, y_train)
    return clf
예제 #3
0
                                                    proportion_train=0.75,
                                                    random_state=random_state)
except KeyError:
    raise ValueError("Wrong dataset name!")

print "X_train", X_train.shape
print "X_test", X_test.shape

# PCA view
print "Computing PCA..."
pca = RandomizedPCA(n_components=300)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

components_pca = select_components(X_train_pca, y_train,
                                   n_components=opts.n_components,
                                   class_distrib="balanced")

print "Computing kernels (PCA view)..."
K_pca_train = pairwise_kernels(X_train_pca, components_pca, metric="rbf",
                               gamma=0.1)
K_pca_test = pairwise_kernels(X_test_pca, components_pca, metric="rbf",
                              gamma=0.1)

# Regular view
components = select_components(X_train, y_train,
                               n_components=opts.n_components,
                               class_distrib="balanced")

print "Computing kernels (regular view)..."
K_train = pairwise_kernels(X_train, components, metric="rbf", gamma=0.1)
예제 #4
0
clf_s = []
clf_kg = []
clf_kb = []
clf_ks = []

j = 0
for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train,
                                         X_test, y_test,
                                         opts.n_folds,
                                         opts.cvtype,
                                         opts.force_cv):
    print "Fold", j

    # selected from datasets
    print "Selected components"
    components = select_components(X_tr, y_tr, opts.n_components,
                                   class_distrib=class_distrib, random_state=j)
    clf_s.append(fit_kmp(X_tr, y_tr, X_te, y_te, components, opt_dict,
                         opts.regression, random_state=j))

    # k-means global
    print "Global k-means"
    components = create_kmeans_comp(X_tr, y_tr,
                                    n_components=opts.n_components,
                                    class_distrib="global",
                                    random_state=j)
    clf_kg.append(fit_kmp(X_tr, y_tr, X_te, y_te, components, opt_dict,
                          opts.regression, random_state=j))

    if not opts.regression:
        # k-means balanced
        print "Balanced k-means"
예제 #5
0
def test_kmp_select_components_stratified():
    components = select_components(mult_dense, mult_target,
                                   n_components=0.5,
                                   class_distrib="stratified",
                                   random_state=0)
    assert_equal(components.shape[0], mult_dense.shape[0]/2-1)
dataset, opts, random_state = parse_kmp()

try:
    X_train, y_train, X_test, y_test = load_dataset(dataset,
                                                    proportion_train=0.75,
                                                    random_state=random_state)
except KeyError:
    raise ValueError("Wrong dataset name!")

print "X_train", X_train.shape
print "X_test", X_test.shape

class_distrib = "random" if opts.regression else "balanced"

components = select_components(X_train, y_train,
                               n_components=opts.n_components,
                               class_distrib=class_distrib)

print "Computing linear kernels..."
linear_train = pairwise_kernels(X_train, components, metric="linear")
linear_test = pairwise_kernels(X_test, components, metric="linear")

print "Computing rbf kernels..."
rbf_train = pairwise_kernels(X_train, components, metric="rbf",
                             gamma=opts.gamma)
rbf_test = pairwise_kernels(X_test, components, metric="rbf",
                            gamma=opts.gamma)

print "Computing polynomial kernels..."
poly_train = pairwise_kernels(X_train, components, metric="poly",
                              degree=opts.degree)