Example #1
0
    start = time.time()
    y_pred = clf.predict(X_test)
    return np.mean(y_test == y_pred), time.time() - start

try:
    dataset = sys.argv[1]
except:
    dataset = "usps0"

try:
    kernel = sys.argv[2]
except:
    kernel = "rbf"

try:
    X_train, y_train, X_test, y_test = load_dataset(dataset)
except KeyError:
    raise ValueError("Wrong dataset name!")

Nu = np.linspace(0.01, 0.15, 10)
res = [fit_nusvc(X_train, y_train, nu=nu, kernel=kernel) for nu in Nu]

clfs, train_times = zip(*res)

res = [predict(clf, X_test, y_test) for clf in clfs]
accuracies, test_times = zip(*res)

n_samples = X_train.shape[0]
pl.figure()
pl.plot(Nu, [np.mean(clf.n_support_) for clf in clfs])
pl.xlabel("nu")
Example #2
0
def parse_kmp(n_nonzero_coefs=200,
              n_components=0.5,
              metric="rbf",
              gamma=0.1,
              degree=4,
              coef0=1.0,
              epsilon=0.0,
              n_validate=5,
              n_refit=5,
              scale=False,
              scale_y=False,
              check_duplicates=False,
              force_cv=False,
              cvtype="random",
              bars=False,
              savefig=""):
    op = OptionParser()
    op.add_option("--n_folds", action="store", default=5,
                  dest="n_folds", type="int")
    op.add_option("-n", action="store", default=n_nonzero_coefs,
                  dest="n_nonzero_coefs", type="float")
    op.add_option("--n_components", action="store", default=n_components,
                  dest="n_components", type="float")
    op.add_option("--metric", action="store", default=metric, dest="metric",
                  type="str")
    op.add_option("--gamma", action="store", default=gamma, dest="gamma",
                  type="float")
    op.add_option("--degree", action="store", default=degree, dest="degree",
                  type="int")
    op.add_option("--coef0", action="store", default=coef0, dest="coef0",
                  type="float")
    op.add_option("--epsilon", action="store", default=epsilon, dest="epsilon",
                  type="float")
    op.add_option("--n_validate", action="store", default=n_validate,
                  dest="n_validate", type="int")
    op.add_option("--n_refit", action="store", default=n_refit, dest="n_refit",
                  type="int")
    op.add_option("--scale", action="store_true", default=scale, dest="scale")
    op.add_option("--scale_y", action="store_true", default=scale_y,
                  dest="scale_y")
    op.add_option("--check_duplicates", action="store_true",
                  default=check_duplicates, dest="check_duplicates")
    op.add_option("--regression", action="store_true", default=scale,
                  dest="regression")
    op.add_option("--force_cv", action="store_true", default=force_cv,
                  dest="force_cv")
    op.add_option("--cvtype", action="store", default=cvtype, dest="cvtype",
                  type="str")
    op.add_option("--bars", action="store_true", default=bars,
                  dest="bars")
    op.add_option("--savefig", action="store", default=savefig, dest="savefig",
                  type="str")

    (opts, args) = op.parse_args()

    try:
        dataset = args[0]
    except:
        dataset = "usps"
    try:
        X_train, y_train, X_test, y_test = load_dataset(dataset)

        print "X_train", X_train.shape
        if X_test is not None: print "X_test", X_test.shape

        return X_train, y_train, X_test, y_test, opts, args
    except KeyError:
        raise ValueError("Wrong dataset name!")
                        scale=True,
                        scale_y=opts.scale_y,
                        check_duplicates=opts.check_duplicates,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(K_train, y_train)
    return clf

dataset, opts, random_state = parse_kmp()

try:
    X_train, y_train, X_test, y_test = load_dataset(dataset,
                                                    proportion_train=0.75,
                                                    random_state=random_state)
except KeyError:
    raise ValueError("Wrong dataset name!")

print "X_train", X_train.shape
print "X_test", X_test.shape

# PCA view
print "Computing PCA..."
pca = RandomizedPCA(n_components=300)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

components_pca = select_components(X_train_pca, y_train,
                                   n_components=opts.n_components,