def cv(x, y, d, gamma_features, gamma_kernel_values, k, C_values, gamma_times=None, n_folds=3, random_state=None): if gamma_times is None: gamma_times = [None] perfs = {} d2ideal = {} k_fold = sklearn_fe.custom_skfold(y=y, n_folds=n_folds, random_state=random_state) n_per_set = x.shape[1] / d assert k <= n_per_set for gamma_f in gamma_features: for gamma_t in gamma_times: scale_t = preprocess.gamma2scale(gamma_t, gamma_f) centroids, weights = compute_features(x, k, scale_t=scale_t, d=d) kernel_fun_noexp = gram_matrix_fun_sym_noexp(gamma_f) gram_noexp = kernel_fun_noexp(centroids, weights) for gamma_kernel in gamma_kernel_values: gram = numpy.exp(-gamma_kernel * gram_noexp) d2ideal[gamma_f, gamma_kernel, gamma_t] = kernel.dist_ideal(gram, y) for train_index, test_index in k_fold: gram_train = gram[train_index, :][:, train_index] gram_test = gram[test_index, :][:, train_index] for C in C_values: if (gamma_f, gamma_kernel, gamma_t, C) not in perfs.keys(): perfs[gamma_f, gamma_kernel, gamma_t, C] = [] clf = svm.SVC(C=C, kernel="precomputed") clf.fit(gram_train, y[train_index]) y_pred = clf.predict(gram_test) acc = sklearn_fe.custom_accuracy_score(y_pred=y_pred, y_true=y[test_index]) perfs[gamma_f, gamma_kernel, gamma_t, C].append(acc) print(gamma_f, gamma_t, gamma_kernel) for (gamma_f, gamma_kernel, gamma_t, C), values in perfs.items(): perfs[gamma_f, gamma_kernel, gamma_t, C] = (numpy.mean(values), d2ideal[gamma_f, gamma_kernel, gamma_t]) return dictionary.argmax_doubleval(perfs)
def cv(x, y, d, gamma_features, gamma_kernel_values, n_coeff, C_values, gamma_times=None, normalize=False, n_folds=3, random_state=None): if gamma_times is None: gamma_times = [None] perfs = {} d2ideal = {} n_per_set = x.shape[1] / d k_fold = sklearn_fe.custom_skfold(y=y, n_folds=n_folds, random_state=random_state) for gamma_f in gamma_features: rbf_feature = RBFSampler(gamma=gamma_f, n_components=n_coeff).fit(x.reshape((-1, d))) for gamma_t in gamma_times: scale_t = preprocess.gamma2scale(gamma_t, gamma_f) norm_t = preprocess.gamma2norm(gamma_t, n_per_set=n_per_set) train_rff = compute_features(x, rbf_feature, scale_t=scale_t, d=d, normalize=normalize, norm_t=norm_t) for gamma_kernel in gamma_kernel_values: gram = gram_matrix(train_rff, train_rff, gamma_kernel) d2ideal[gamma_f, gamma_kernel, gamma_t] = kernel.dist_ideal(gram, y) for train_index, test_index in k_fold: gram_train = gram[train_index, :][:, train_index] gram_test = gram[test_index, :][:, train_index] for C in C_values: if (gamma_f, gamma_kernel, gamma_t, C) not in perfs.keys(): perfs[gamma_f, gamma_kernel, gamma_t, C] = [] clf = svm.SVC(C=C, kernel="precomputed") clf.fit(gram_train, y[train_index]) y_pred = clf.predict(gram_test) acc = metrics.accuracy_score(y_pred=y_pred, y_true=y[test_index]) perfs[gamma_f, gamma_kernel, gamma_t, C].append(acc) for (gamma_f, gamma_kernel, gamma_t, C), values in perfs.items(): perfs[gamma_f, gamma_kernel, gamma_t, C] = (numpy.mean(values), d2ideal[gamma_f, gamma_kernel, gamma_t]) return dictionary.argmax_doubleval(perfs)
Cs = numpy.logspace(0, 6, 5) gamma_features = numpy.logspace(0, 6, 5) gamma_kernel_values = numpy.logspace(-1, 5, 5) gamma_times = [0.] + [gamma_f for gamma_f in gamma_features] normalize_features = False use_time_info = True target_dataset = sys.argv[1] n_coeff = int(sys.argv[2]) if not use_time_info: gamma_times = [None] path = "datasets/ucr_t/" for ds_name in UCRreader.list_datasets(path=path): if target_dataset is None or target_dataset == ds_name: x_train, x_test, y_train, y_test, n_per_set, d = UCRreader.read_dataset_with_time(ds_name, path=path, normalize_features=normalize_features, use_time_info=use_time_info) gamma_f, gamma_kernel, gamma_t, C = sqfd_fourier.cv(x_train, y_train, d, gamma_features, gamma_kernel_values, n_coeff, Cs, gamma_times=gamma_times, normalize=False) rbf_feature = RBFSampler(gamma=gamma_f, n_components=n_coeff).fit(x_train.reshape((-1, d))) scale_t = preprocess.gamma2scale(gamma_t, gamma_f) norm_t = preprocess.gamma2norm(gamma_t, n_per_set=n_per_set) phi_train = sqfd_fourier.compute_features(x_train, rbf_feature, scale_t=scale_t, d=d, normalize=False, norm_t=norm_t) phi_test = sqfd_fourier.compute_features(x_test, rbf_feature, scale_t=scale_t, d=d, normalize=False, norm_t=norm_t) clf = svm.SVC(C=C, kernel="rbf", gamma=gamma_kernel) clf.fit(phi_train, y_train) y_pred = clf.predict(phi_test) acc = metrics.accuracy_score(y_pred=y_pred, y_true=y_test) print("SQFD-RFF_d%d;%s;%f;%f;%s;%f;%f" % (n_coeff, ds_name, gamma_f, gamma_kernel, str(gamma_t), C, 1 - acc))