metric=opts.metric, gamma=opts.gamma, degree=opts.degree, coef0=opts.coef0, scale=opts.scale, n_validate=opts.n_validate, epsilon=opts.epsilon, #score_func=f1_score, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp() X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test clf_r = [] clf_b = [] clf_s = [] for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train, X_test, y_test, opts.n_folds, not opts.regression): clf_r.append(fit_kmp(X_tr, y_tr, X_te, y_te, "random", opts, random_state=0)) clf_b.append(fit_kmp(X_tr, y_tr, X_te, y_te, "balanced", opts, random_state=0))
gamma=opts.gamma, degree=opts.degree, coef0=opts.coef0, scale=opts.scale, scale_y=opts.scale_y, check_duplicates=opts.check_duplicates, n_validate=opts.n_validate, epsilon=opts.epsilon, X_val=X_test, y_val=y_test, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp(n_components=1.0, check_duplicates=True) X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test if opts.n_nonzero_coefs < 1: raise ValueError("n_nonzero_coefs must be a positive integer") cv = list(split_data(X_train, y_train, X_test, y_test, opts.n_folds, not opts.regression)) amounts = np.linspace(0.1, 1.0, 10) #amounts = (0.25, 0.5, 0.75, 1.0) #amounts = np.linspace(0.1, 0.5, 5) acc_sup = np.zeros((len(amounts), len(cv)), dtype=np.float64) acc_semi = np.zeros((len(amounts), len(cv)), dtype=np.float64)
n_refit=opts.n_refit, estimator=Ridge(alpha=opts.alpha), X_val=K_test, y_val=y_test, metric="precomputed", scale=True, scale_y=opts.scale_y, check_duplicates=opts.check_duplicates, n_validate=opts.n_validate, epsilon=opts.epsilon, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(K_train, y_train) return clf dataset, opts, random_state = parse_kmp() try: X_train, y_train, X_test, y_test = load_dataset(dataset, proportion_train=0.75, random_state=random_state) except KeyError: raise ValueError("Wrong dataset name!") print "X_train", X_train.shape print "X_test", X_test.shape # PCA view print "Computing PCA..." pca = RandomizedPCA(n_components=300) X_train_pca = pca.fit_transform(X_train)
@memory.cache def fit_kmp(X_train, y_train, X_test, y_test, components, opt_dict, regression, random_state): klass = KMPRegressor if regression else KMPClassifier clf = klass(init_components=components, estimator=Ridge(alpha=0.1), X_val=X_test, y_val=y_test, verbose=1, random_state=random_state, n_jobs=-1, **opt_dict) clf.fit(X_train, y_train) return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp(check_duplicates=True) opt_dict = options_to_dict(opts) class_distrib = "random" if opts.regression else "balanced" clf_s = [] clf_kg = [] clf_kb = [] clf_ks = [] j = 0 for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train, X_test, y_test, opts.n_folds, opts.cvtype, opts.force_cv):