Example #1
0
def cv(x, y, d, gamma_features, gamma_kernel_values, k, C_values, gamma_times=None, n_folds=3, random_state=None):
    if gamma_times is None:
        gamma_times = [None]
    perfs = {}
    d2ideal = {}
    k_fold = sklearn_fe.custom_skfold(y=y, n_folds=n_folds, random_state=random_state)
    n_per_set = x.shape[1] / d
    assert k <= n_per_set
    for gamma_f in gamma_features:
        for gamma_t in gamma_times:
            scale_t = preprocess.gamma2scale(gamma_t, gamma_f)
            centroids, weights = compute_features(x, k, scale_t=scale_t, d=d)
            kernel_fun_noexp = gram_matrix_fun_sym_noexp(gamma_f)
            gram_noexp = kernel_fun_noexp(centroids, weights)
            for gamma_kernel in gamma_kernel_values:
                gram = numpy.exp(-gamma_kernel * gram_noexp)
                d2ideal[gamma_f, gamma_kernel, gamma_t] = kernel.dist_ideal(gram, y)
                for train_index, test_index in k_fold:
                    gram_train = gram[train_index, :][:, train_index]
                    gram_test = gram[test_index, :][:, train_index]
                    for C in C_values:
                        if (gamma_f, gamma_kernel, gamma_t, C) not in perfs.keys():
                            perfs[gamma_f, gamma_kernel, gamma_t, C] = []
                        clf = svm.SVC(C=C, kernel="precomputed")
                        clf.fit(gram_train, y[train_index])
                        y_pred = clf.predict(gram_test)
                        acc = sklearn_fe.custom_accuracy_score(y_pred=y_pred, y_true=y[test_index])
                        perfs[gamma_f, gamma_kernel, gamma_t, C].append(acc)
                print(gamma_f, gamma_t, gamma_kernel)
    for (gamma_f, gamma_kernel, gamma_t, C), values in perfs.items():
        perfs[gamma_f, gamma_kernel, gamma_t, C] = (numpy.mean(values), d2ideal[gamma_f, gamma_kernel, gamma_t])
    return dictionary.argmax_doubleval(perfs)
Example #2
0
def cv(x,
       y,
       d,
       gamma_features,
       gamma_kernel_values,
       n_coeff,
       C_values,
       gamma_times=None,
       normalize=False,
       n_folds=3,
       random_state=None):
    if gamma_times is None:
        gamma_times = [None]
    perfs = {}
    d2ideal = {}
    n_per_set = x.shape[1] / d
    k_fold = sklearn_fe.custom_skfold(y=y,
                                      n_folds=n_folds,
                                      random_state=random_state)
    for gamma_f in gamma_features:
        rbf_feature = RBFSampler(gamma=gamma_f,
                                 n_components=n_coeff).fit(x.reshape((-1, d)))
        for gamma_t in gamma_times:
            scale_t = preprocess.gamma2scale(gamma_t, gamma_f)
            norm_t = preprocess.gamma2norm(gamma_t, n_per_set=n_per_set)
            train_rff = compute_features(x,
                                         rbf_feature,
                                         scale_t=scale_t,
                                         d=d,
                                         normalize=normalize,
                                         norm_t=norm_t)
            for gamma_kernel in gamma_kernel_values:
                gram = gram_matrix(train_rff, train_rff, gamma_kernel)
                d2ideal[gamma_f, gamma_kernel,
                        gamma_t] = kernel.dist_ideal(gram, y)
                for train_index, test_index in k_fold:
                    gram_train = gram[train_index, :][:, train_index]
                    gram_test = gram[test_index, :][:, train_index]
                    for C in C_values:
                        if (gamma_f, gamma_kernel, gamma_t,
                                C) not in perfs.keys():
                            perfs[gamma_f, gamma_kernel, gamma_t, C] = []
                        clf = svm.SVC(C=C, kernel="precomputed")
                        clf.fit(gram_train, y[train_index])
                        y_pred = clf.predict(gram_test)
                        acc = metrics.accuracy_score(y_pred=y_pred,
                                                     y_true=y[test_index])
                        perfs[gamma_f, gamma_kernel, gamma_t, C].append(acc)
    for (gamma_f, gamma_kernel, gamma_t, C), values in perfs.items():
        perfs[gamma_f, gamma_kernel, gamma_t,
              C] = (numpy.mean(values), d2ideal[gamma_f, gamma_kernel,
                                                gamma_t])
    return dictionary.argmax_doubleval(perfs)
Cs = numpy.logspace(0, 6, 5)
gamma_features = numpy.logspace(0, 6, 5)
gamma_kernel_values = numpy.logspace(-1, 5, 5)
gamma_times = [0.] + [gamma_f for gamma_f in gamma_features]
normalize_features = False
use_time_info = True

target_dataset = sys.argv[1]
n_coeff = int(sys.argv[2])

if not use_time_info:
    gamma_times = [None]
path = "datasets/ucr_t/"
for ds_name in UCRreader.list_datasets(path=path):
    if target_dataset is None or target_dataset == ds_name:
        x_train, x_test, y_train, y_test, n_per_set, d = UCRreader.read_dataset_with_time(ds_name, path=path,
                                                                                normalize_features=normalize_features,
                                                                                          use_time_info=use_time_info)
        gamma_f, gamma_kernel, gamma_t, C = sqfd_fourier.cv(x_train, y_train, d, gamma_features, gamma_kernel_values,
                                                            n_coeff, Cs, gamma_times=gamma_times, normalize=False)
        rbf_feature = RBFSampler(gamma=gamma_f, n_components=n_coeff).fit(x_train.reshape((-1, d)))
        scale_t = preprocess.gamma2scale(gamma_t, gamma_f)
        norm_t = preprocess.gamma2norm(gamma_t, n_per_set=n_per_set)
        phi_train = sqfd_fourier.compute_features(x_train, rbf_feature, scale_t=scale_t, d=d, normalize=False, norm_t=norm_t)
        phi_test = sqfd_fourier.compute_features(x_test, rbf_feature, scale_t=scale_t, d=d, normalize=False, norm_t=norm_t)
        clf = svm.SVC(C=C, kernel="rbf", gamma=gamma_kernel)
        clf.fit(phi_train, y_train)
        y_pred = clf.predict(phi_test)
        acc = metrics.accuracy_score(y_pred=y_pred, y_true=y_test)
        print("SQFD-RFF_d%d;%s;%f;%f;%s;%f;%f" % (n_coeff, ds_name, gamma_f, gamma_kernel, str(gamma_t), C, 1 - acc))