metric=opts.metric,
                        gamma=opts.gamma,
                        degree=opts.degree,
                        coef0=opts.coef0,
                        scale=opts.scale,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        #score_func=f1_score,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(X_train, y_train)
    return clf


X_train, y_train, X_test, y_test, opts, args = parse_kmp()
X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test

clf_r = []
clf_b = []
clf_s = []

for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train,
                                         X_test, y_test,
                                         opts.n_folds,
                                         not opts.regression):

    clf_r.append(fit_kmp(X_tr, y_tr, X_te, y_te, "random", opts,
                         random_state=0))
    clf_b.append(fit_kmp(X_tr, y_tr, X_te, y_te, "balanced", opts,
                    random_state=0))
                gamma=opts.gamma,
                degree=opts.degree,
                coef0=opts.coef0,
                scale=opts.scale,
                scale_y=opts.scale_y,
                check_duplicates=opts.check_duplicates,
                n_validate=opts.n_validate,
                epsilon=opts.epsilon,
                X_val=X_test, y_val=y_test,
                verbose=1,
                random_state=random_state,
                n_jobs=-1)
    clf.fit(X_train, y_train)
    return clf

X_train, y_train, X_test, y_test, opts, args = parse_kmp(n_components=1.0,
                                                         check_duplicates=True)
X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test

if opts.n_nonzero_coefs < 1:
    raise ValueError("n_nonzero_coefs must be a positive integer")

cv = list(split_data(X_train, y_train,
                     X_test, y_test,
                     opts.n_folds,
                     not opts.regression))

amounts = np.linspace(0.1, 1.0, 10)
#amounts = (0.25, 0.5, 0.75, 1.0)
#amounts = np.linspace(0.1, 0.5, 5)
acc_sup = np.zeros((len(amounts), len(cv)), dtype=np.float64)
acc_semi = np.zeros((len(amounts), len(cv)), dtype=np.float64)
                        n_refit=opts.n_refit,
                        estimator=Ridge(alpha=opts.alpha),
                        X_val=K_test, y_val=y_test,
                        metric="precomputed",
                        scale=True,
                        scale_y=opts.scale_y,
                        check_duplicates=opts.check_duplicates,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(K_train, y_train)
    return clf

dataset, opts, random_state = parse_kmp()

try:
    X_train, y_train, X_test, y_test = load_dataset(dataset,
                                                    proportion_train=0.75,
                                                    random_state=random_state)
except KeyError:
    raise ValueError("Wrong dataset name!")

print "X_train", X_train.shape
print "X_test", X_test.shape

# PCA view
print "Computing PCA..."
pca = RandomizedPCA(n_components=300)
X_train_pca = pca.fit_transform(X_train)
@memory.cache
def fit_kmp(X_train, y_train, X_test, y_test, components, opt_dict, regression,
            random_state):
    klass = KMPRegressor if regression else KMPClassifier
    clf = klass(init_components=components,
                estimator=Ridge(alpha=0.1),
                X_val=X_test, y_val=y_test,
                verbose=1,
                random_state=random_state,
                n_jobs=-1,
                **opt_dict)
    clf.fit(X_train, y_train)
    return clf

X_train, y_train, X_test, y_test, opts, args = parse_kmp(check_duplicates=True)
opt_dict = options_to_dict(opts)

class_distrib = "random" if opts.regression else "balanced"

clf_s = []
clf_kg = []
clf_kb = []
clf_ks = []

j = 0
for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train,
                                         X_test, y_test,
                                         opts.n_folds,
                                         opts.cvtype,
                                         opts.force_cv):