def outofsample_extensions(method='linear-regression'): # Load the data and init seeds train_data, train_labels, test_data, test_labels = load_mnist(dataset_path='data') np.random.seed(1) sklearn.utils.check_random_state(1) n_train_samples = 5000 # Learn a new space using Isomap isomap = Isomap(n_components=10, n_neighbors=20) train_data_isomap = np.float32(isomap.fit_transform(train_data[:n_train_samples, :])) if method == 'linear-regression': # Use linear regression to provide baseline out-of-sample extensions proj = LinearRegression() proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap)) acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples], proj.predict(test_data), test_labels) elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d': # Use the SEF to provide out-of-sample extensions if method == 'c-ISOMAP-10d': proj = LinearSEF(train_data.shape[1], output_dimensionality=10) proj.cuda() else: proj = LinearSEF(train_data.shape[1], output_dimensionality=20) proj.cuda() loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy', epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1) acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples], proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def unsupervised_approximation(method='pca'): # Load the data and init seeds train_data, train_labels, test_data, test_labels = load_mnist(dataset_path='data') np.random.seed(1) sklearn.utils.check_random_state(1) n_train_samples = 5000 if method == 'pca': # Learn a baseline pca projection proj = PCA(n_components=10) proj.fit(train_data[:n_train_samples, :]) elif method == 's-pca': # Learn a high dimensional projection proj_to_copy = PCA(n_components=50) proj_to_copy.fit(train_data[:n_train_samples, :]) target_data = np.float32(proj_to_copy.transform(train_data[:n_train_samples, :])) # Approximate it using the SEF and 10 dimensions proj = LinearSEF(train_data.shape[1], output_dimensionality=10) proj.cuda() loss = proj.fit(data=train_data[:n_train_samples, :], target_data=target_data, target='copy', epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1) # Evaluate the method acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples], proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def unsupervised_approximation(method=None, dataset=None): np.random.seed(1) sklearn.utils.check_random_state(1) dataset_path = 'data' train_data, train_labels, test_data, test_labels = dataset_loader(dataset_path, dataset, seed=1) if method == 'pca': # Learn a baseline pca projection proj = PCA(n_components=10) proj.fit(train_data) elif method == 's-pca': # Learn a high dimensional projection proj_to_copy = PCA(n_components=50) proj_to_copy.fit(train_data) target_data = np.float32(proj_to_copy.transform(train_data)) # Approximate it using the SEF and 10 dimensions proj = LinearSEF(train_data.shape[1], output_dimensionality=10) proj.cuda() loss = proj.fit(data=train_data, target_data=target_data, target='copy', epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1) # Evaluate the method acc = evaluate_svm(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def svm_approximation(method=None, dataset=None): np.random.seed(1) sklearn.utils.check_random_state(1) dataset_path = 'data' train_data, train_labels, test_data, test_labels = dataset_loader( dataset_path, dataset, seed=1) lab = LabelEncoder() train_labels = lab.fit_transform(train_labels) test_labels = lab.transform(test_labels) if method == 'svm': acc = evaluate_svm(train_data, train_labels, test_data, test_labels) elif method == 'ncc': acc = evaluate_ncc(train_data, train_labels, test_data, test_labels) elif method == 'S-SVM-A-10d' or method == 'S-SVM-A-20d': # Learn an SVM parameters = { 'kernel': ['linear'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000] } model = grid_search.GridSearchCV(svm.SVC( max_iter=10000, decision_function_shape='ovo'), parameters, n_jobs=-1, cv=3) model.fit(train_data, train_labels) # params = {'model': model, 'n_labels': np.unique(train_labels).shape[0], 'scaler': None} Gt = generate_svm_similarity_matrix(train_data, train_labels, len(np.unique(train_labels)), model, None) params = {'Gt': Gt} # Learn a similarity embedding if method == 'S-SVM-A-10d': dims = len(np.unique(train_labels)) else: dims = 2 * len(np.unique(train_labels)) proj = LinearSEF(train_data.shape[1], output_dimensionality=dims) proj.cuda() loss = proj.fit(data=train_data, target_data=train_data, target_labels=train_labels, target=sim_target_svm_precomputed, target_params=params, epochs=100, learning_rate=0.001, batch_size=256, verbose=True, regularizer_weight=0.001) acc = evaluate_ncc(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def supervised_reduction(method=None, dataset=None): np.random.seed(1) sklearn.utils.check_random_state(1) dataset_path = 'data' train_data, train_labels, test_data, test_labels = dataset_loader( dataset_path, dataset, seed=1) scaler = StandardScaler() train_data = scaler.fit_transform(train_data) test_data = scaler.transform(test_data) if dataset == 'yale': regularizer_weight = 0.0001 else: regularizer_weight = 1 n_classes = len(np.unique(train_labels)) if method == 'lda': proj = LinearDiscriminantAnalysis(n_components=n_classes - 1) proj.fit(train_data, train_labels) elif method == 's-lda': proj = LinearSEF(train_data.shape[1], output_dimensionality=(n_classes - 1)) proj.cuda() loss = proj.fit(data=train_data, target_labels=train_labels, epochs=100, target='supervised', batch_size=256, regularizer_weight=regularizer_weight, learning_rate=0.001, verbose=False) elif method == 's-lda-2x': # SEF output dimensions are not limited proj = LinearSEF(train_data.shape[1], output_dimensionality=2 * (n_classes - 1)) proj.cuda() loss = proj.fit(data=train_data, target_labels=train_labels, epochs=100, target='supervised', batch_size=256, regularizer_weight=regularizer_weight, learning_rate=0.001, verbose=False) acc = evaluate_svm(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None): np.random.seed(1) sklearn.utils.check_random_state(1) dataset_path = 'data' train_data, train_labels, test_data, test_labels = dataset_loader( dataset_path, dataset, seed=1) # Learn a new space using Isomap isomap = Isomap(n_components=10, n_neighbors=20) train_data_isomap = np.float32(isomap.fit_transform(train_data)) sigma = mean_data_distance(np.float32(train_data)) if method == 'kernel-regression': # Use kernel regression to provide baseline out-of-sample extensions proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma**2)) proj.fit(np.float64(train_data), np.float64(train_data_isomap)) acc = evaluate_svm(proj.predict(train_data), train_labels, proj.predict(test_data), test_labels) elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d': # Use the SEF to provide out-of-sample extensions if method == 'cK-ISOMAP-10d': dims = 10 else: dims = 20 proj = KernelSEF(train_data, train_data.shape[1], output_dimensionality=dims) proj.cuda() loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy', epochs=100, batch_size=256, verbose=True, learning_rate=0.00001, regularizer_weight=0.001) acc = evaluate_svm(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None): np.random.seed(1) sklearn.utils.check_random_state(1) dataset_path = 'data' train_data, train_labels, test_data, test_labels = dataset_loader(dataset_path, dataset, seed=1) # Learn a new space using Isomap isomap = Isomap(n_components=10, n_neighbors=20) train_data_isomap = np.float32(isomap.fit_transform(train_data)) if method == 'linear-regression': from sklearn.preprocessing import StandardScaler std = StandardScaler() train_data = std.fit_transform(train_data) test_data = std.transform(test_data) # Use linear regression to provide baseline out-of-sample extensions proj = LinearRegression() proj.fit(np.float64(train_data), np.float64(train_data_isomap)) acc = evaluate_svm(proj.predict(train_data), train_labels, proj.predict(test_data), test_labels) elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d': # Use the SEF to provide out-of-sample extensions if method == 'c-ISOMAP-10d': proj = LinearSEF(train_data.shape[1], output_dimensionality=10) proj.cuda() else: proj = LinearSEF(train_data.shape[1], output_dimensionality=20) proj.cuda() loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy', epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1) acc = evaluate_svm(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def svm_approximation(method=None): # Load the data and init seeds train_data, train_labels, test_data, test_labels = load_mnist( dataset_path='data') np.random.seed(1) sklearn.utils.check_random_state(1) n_train = 5000 if method == 'svm': acc = evaluate_svm(train_data[:n_train, :], train_labels[:n_train], test_data, test_labels) elif method == 'ncc': acc = evaluate_ncc(train_data[:n_train, :], train_labels[:n_train], test_data, test_labels) elif method == 'S-SVM-A-10d' or method == 'S-SVM-A-20d': # Learn an SVM parameters = { 'kernel': ['linear'], 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000] } model = GridSearchCV(svm.SVC(max_iter=10000, decision_function_shape='ovo'), parameters, n_jobs=-1, cv=3) model.fit(train_data[:n_train], train_labels[:n_train]) # Learn a similarity embedding if method == 'S-SVM-A-10d': dims = 10 else: dims = 20 proj = LinearSEF(train_data.shape[1], output_dimensionality=dims) proj.cuda() # Precompute the similarity matrix Gt = generate_svm_similarity_matrix(train_data[:n_train], train_labels[:n_train], len(np.unique(train_labels)), model, None) params = {'Gt': Gt} # otherwise, we can simply set target='svm' and use the following target # params = {'model': model, 'n_labels': np.unique(train_labels).shape[0], 'scaler': scaler} loss = proj.fit(data=train_data[:n_train, :], target_data=train_data[:n_train, :], target_labels=train_labels[:n_train], target=sim_target_svm_precomputed, target_params=params, epochs=50, learning_rate=0.001, batch_size=128, verbose=True, regularizer_weight=0.001) acc = evaluate_ncc(proj.transform(train_data[:n_train, :]), train_labels[:n_train], proj.transform(test_data), test_labels) print("Method: ", method, " Test accuracy: ", 100 * acc, " %")