def process_nca(self, **option): '''Metric Learning algorithm: NCA''' GeneExp = self.GeneExp_train Label = self.Label_train nca = NCA(**option) nca.fit(GeneExp, Label) self.Trans['NCA'] = nca.transformer()
def runNCA(X_train, X_test, y_train, y_test): transformer = NCA(max_iter=100, verbose=True) transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_NCA', X_train_proj) np.save('X_test_NCA', X_test_proj) return X_train_proj, X_test_proj
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) nca = NCA(max_iter=(100000//n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def test_one_class(self): # if there is only one class the gradient is null, so the final matrix # must stay like the initialization X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] A = make_spd_matrix(X.shape[1], X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A)
def NCA(self): print "Warning the features will be transformed" lmnn = NCA() NCA.fit(self.features, targets) self.features = NCA.transform(self.features) self.prepare_for_testing() #Evaluate with nn self.nearest_neighbors("NCA + KNN")
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) nca = NCA(max_iter=(100000 // n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def test_one_class(self): # if there is only one class the gradient is null, so the final matrix # must stay like the initialization X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] EPS = np.finfo(float).eps A = np.zeros((X.shape[1], X.shape[1])) np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) nca = NCA(max_iter=30, num_dims=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A)
def test_iris(self): n = self.iris_points.shape[0] nca = NCA(max_iter=(100000 // n), learning_rate=0.01) nca.fit(self.iris_points, self.iris_labels) # Result copied from Iris example at # https://github.com/vomjom/nca/blob/master/README.mkd expected = [[-0.09935, -0.2215, 0.3383, 0.443], [+0.2532, 0.5835, -0.8461, -0.8915], [-0.729, -0.6386, 1.767, 1.832], [-0.9405, -0.8461, 2.281, 2.794]] assert_array_almost_equal(expected, nca.transformer(), decimal=3)
def test_iris(self): n = self.iris_points.shape[0] nca = NCA(max_iter=(100000//n), learning_rate=0.01) nca.fit(self.iris_points, self.iris_labels) # Result copied from Iris example at # https://github.com/vomjom/nca/blob/master/README.mkd expected = [[-0.09935, -0.2215, 0.3383, 0.443], [+0.2532, 0.5835, -0.8461, -0.8915], [-0.729, -0.6386, 1.767, 1.832], [-0.9405, -0.8461, 2.281, 2.794]] assert_array_almost_equal(expected, nca.transformer(), decimal=3)
def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction nca = NCA(max_iter=(100000//n)) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction nca = NCA(max_iter=(100000//n), num_dims=2) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20)
def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction nca = NCA(max_iter=(100000 // n)) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction nca = NCA(max_iter=(100000 // n), num_dims=2, tol=1e-9) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.20)
def test_simple_example(self): """Test on a simple example. Puts four points in the input space where the opposite labels points are next to each other. After transform the same labels points should be next to each other. """ X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) nca = NCA(num_dims=2,) nca.fit(X, y) Xansformed = nca.transform(X) np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], np.array([2, 3, 0, 1]))
def test_simple_example(self): """Test on a simple example. Puts four points in the input space where the opposite labels points are next to each other. After transform the same labels points should be next to each other. """ X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) nca = NCA(n_components=2,) nca.fit(X, y) Xansformed = nca.transform(X) np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], np.array([2, 3, 0, 1]))
class NCA: def __init__(self): self.metric_model = NCA_ml() self.X_tr = None self.y_train = None self.X_te = None def fit(self, X_tr, y_train): """Fits the model to the prescribed data.""" self.X_tr = X_tr self.y_train = y_train return self.metric_model.fit(X_tr, y_train) def transform(self, X): """Transforms the test data according to the model""" return self.metric_model.transform(X) def predict_proba(self, X_te): """Predicts the probabilities of each of the test samples""" test_samples = X_te.shape[0] self.X_tr = self.transform(self.X_tr) clf = NearestCentroid() clf.fit(self.X_tr, self.y_train) centroids = clf.centroids_ probabilities = np.zeros((test_samples, centroids.shape[0])) for sample in xrange(test_samples): probabilities[sample] = sk_nearest_neighbour_proba( centroids, X_te[sample, :]) return probabilities
def nca_fit(X_train, Y_train, X_test, Y_test, color_map): nca = NCA(init='pca', max_iter=5000) nca.fit(X_train, Y_train) X_train_transformed = nca.transform(X_train) if (X_train.shape[1] == 2): plt.figure() plt.scatter(X_train_transformed[:, 0], X_train_transformed[:, 1], c=color_map[Y_train], s=2) plt.savefig("after_nca_transform_train.png", dpi=300) X_test_transformed = nca.transform(X_test) if (X_test.shape[1] == 2): plt.figure() plt.scatter(X_test_transformed[:, 0], X_test_transformed[:, 1], c=color_map[Y_test], s=2) plt.savefig("after_nca_transform_test.png", dpi=300) return (X_train_transformed, X_test_transformed)
def test_singleton_class(self): X = self.iris_points y = self.iris_labels # one singleton class: test fitting works singleton_class = 1 ind_singleton, = np.where(y == singleton_class) y[ind_singleton] = 2 y[ind_singleton[0]] = singleton_class nca = NCA(max_iter=30) nca.fit(X, y) # One non-singleton class: test fitting works ind_1, = np.where(y == 1) ind_2, = np.where(y == 2) y[ind_1] = 0 y[ind_1[0]] = 1 y[ind_2] = 0 y[ind_2[0]] = 2 nca = NCA(max_iter=30) nca.fit(X, y) # Only singleton classes: test fitting does nothing (the gradient # must be null in this case, so the final matrix must stay like # the initialization) ind_0, = np.where(y == 0) ind_1, = np.where(y == 1) ind_2, = np.where(y == 2) X = X[[ind_0[0], ind_1[0], ind_2[0]]] y = y[[ind_0[0], ind_1[0], ind_2[0]]] EPS = np.finfo(float).eps A = np.zeros((X.shape[1], X.shape[1])) np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) nca = NCA(max_iter=30, num_dims=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A)
def test_singleton_class(self): X = self.iris_points y = self.iris_labels # one singleton class: test fitting works singleton_class = 1 ind_singleton, = np.where(y == singleton_class) y[ind_singleton] = 2 y[ind_singleton[0]] = singleton_class nca = NCA(max_iter=30) nca.fit(X, y) # One non-singleton class: test fitting works ind_1, = np.where(y == 1) ind_2, = np.where(y == 2) y[ind_1] = 0 y[ind_1[0]] = 1 y[ind_2] = 0 y[ind_2[0]] = 2 nca = NCA(max_iter=30) nca.fit(X, y) # Only singleton classes: test fitting does nothing (the gradient # must be null in this case, so the final matrix must stay like # the initialization) ind_0, = np.where(y == 0) ind_1, = np.where(y == 1) ind_2, = np.where(y == 2) X = X[[ind_0[0], ind_1[0], ind_2[0]]] y = y[[ind_0[0], ind_1[0], ind_2[0]]] A = make_spd_matrix(X.shape[1], X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A)
scores_NCA_A = np.zeros([5,3]) row = 0 for M in M_array: U = norm_eigenvec_ld[:, 0:M] W = np.matmul(U.T, A) W_testing = np.matmul(U.T, A_testing) index_training_array = np.array(index_training[0,:]) index_testing_array = np.array(index_testing[0,:]) nca = NCA() nca.fit(W.T, index_training_array) new_indices, rank_1, rank_10 = KNN(W_testing.T, index_testing_array, nca.get_metric()) mAP = mAP_calculation(new_indices, index_testing_array) scores_NCA_A[row,0] = rank_1 scores_NCA_A[row,1] = rank_10 scores_NCA_A[row,2] = mAP row = row+1 # NCA B_testing mean_training_B = B_training.mean(axis=1, keepdims=True) B = B_training - mean_training_B norm_eigenvec_ld_B = PCA_eigenvec(B)
rf_params = {'criterion':('gini', 'entropy')} classifier_stats["random_forest"] = test_classifier(lr, lr_params) pickle.dump(classifier_stats, open( "/data/ml2/vishakh/mimic_out/pred_statscheckpoint.pk", 'wb')) print("Nearest Neighbors") nn = KNeighborsClassifier() nn_params = {"n_neighbors" : range(5, max(6, len(data)/10)), 'leaf_size':range(30,100)} classifier_stats["nearest_neighbors"] = test_classifier(nn, nn_params) pickle.dump(classifier_stats, open( "/data/ml2/vishakh/mimic_out/pred_statscheckpoint.pk", 'wb')) print("Metric learning") nca1 = NCA(max_iter=1000, learning_rate=0.01) nca1.fit(x_train], y_train) t_x_train = nca1.transform() nca2 = NCA(max_iter=1000, learning_rate=0.01) nca2.fit(x_test, np.array(y_test)) t_x_test = nca2.transform() dat = [t_x_train, t_x_test] nn_metric = KNeighborsClassifier() nn_metric_params = {"n_neighbors" : range(5, max(6, len(data)/10)), 'leaf_size':range(30,100)} classifier_stats['nn_metric'] = test_classifier(nn_metric, nn_metric_params, dat) out = pickle.dump(classifier_stats, open("/data/ml2/vishakh/mimic_out/predacc_raw_umls.csv", 'wb'))
if __name__ == '__main__': parser = argparse.ArgumentParser("NCA") parser.add_argument('--data-root', default='./data/raw_split') parser.add_argument('--n-components', type=int, default=2) parser.add_argument('--max-iter', type=int, default=100) args = parser.parse_args() name = f"{args.n_components}_{args.max_iter}" data_save_folder = f"./data/NCA/{name}" makedirs(data_save_folder) X_train, X_test, y_train, y_test = load_split(args) print(X_train.shape) t = time.time() nca = NCA(n_components=args.n_components, max_iter=args.max_iter, verbose=1) nca.fit(X_train, y_train) print(" # NCA fit done.") np.save(osp.join(data_save_folder, "feature_train.npy"), nca.transform(X_train)) np.save(osp.join(data_save_folder, "label_train.npy"), y_train) np.save(osp.join(data_save_folder, "feature_test.npy"), nca.transform(X_test)) np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
def main(params): initialize_results_dir(params.get('results_dir')) backup_params(params, params.get('results_dir')) print('>>> loading data...') X_train, y_train, X_test, y_test = LoaderFactory().create( name=params.get('dataset'), root=params.get('dataset_dir'), random=True, seed=params.getint('split_seed'))() print('<<< data loaded') print('>>> computing psd matrix...') if params.get('algorithm') == 'identity': psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype) elif params.get('algorithm') == 'nca': nca = NCA(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) nca.fit(X_train, y_train) psd_matrix = nca.get_mahalanobis_matrix() elif params.get('algorithm') == 'lmnn': lmnn = LMNN(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) lmnn.fit(X_train, y_train) psd_matrix = lmnn.get_mahalanobis_matrix() elif params.get('algorithm') == 'itml': itml = ITML_Supervised(verbose=True, random_state=params.getint('algorithm_seed')) itml.fit(X_train, y_train) psd_matrix = itml.get_mahalanobis_matrix() elif params.get('algorithm') == 'lfda': lfda = LFDA() lfda.fit(X_train, y_train) psd_matrix = lfda.get_mahalanobis_matrix() elif params.get('algorithm') == 'arml': learner = TripleLearner( optimizer=params.get('optimizer'), optimizer_params={ 'lr': params.getfloat('lr'), 'momentum': params.getfloat('momentum'), 'weight_decay': params.getfloat('weight_decay'), }, criterion=params.get('criterion'), criterion_params={'calibration': params.getfloat('calibration')}, n_epochs=params.getint('n_epochs'), batch_size=params.getint('batch_size'), random_initialization=params.getboolean('random_initialization', fallback=False), update_triple=params.getboolean('update_triple', fallback=False), device=params.get('device'), seed=params.getint('learner_seed')) psd_matrix = learner(X_train, y_train, n_candidate_mins=params.getint('n_candidate_mins', fallback=1)) else: raise Exception('unsupported algorithm') print('<<< psd matrix got') np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'), psd_matrix)
print("Method: LFDA", '\n') lfda = LFDA(k=4, dim=1) x = lfda.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'NCA': print("Method: NCA", '\n') #print('Max', TrainData.max(axis=0)) #print('sssssssss', len(TrainData[0])) #print('sssssssss', len(TrainData.max(axis=0))) #print('Min', TrainData.min(axis=0)) nca = NCA(max_iter=500, learning_rate=0.01) # print('ssssssss', TrainData) x = nca.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'SDML': print("Method: SDML", '\n') sdml = SDML_Supervised(num_constraints=200) x = sdml.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'RCA': print("Method: RCA", '\n') rca = RCA_Supervised(num_chunks=2, chunk_size=1) x = rca.fit(FSTrainData, TrainLabels)
def nca_mnist_experiment(trial, train_percentage=0.1, test_percentage=0.1): encoding_train_imgs_path = './data/MNIST_encoding/tf_train.encoding' encoding_test_imgs_path = './data/MNIST_encoding/tf_test.encoding' train_labels_path = './data/MNIST_encoding/tf_train.labels' test_labels_path = './data/MNIST_encoding/tf_test.labels' encoding_train = pickle.load(open(encoding_train_imgs_path, 'rb')) encoding_test = pickle.load(open(encoding_test_imgs_path, 'rb')) print(encoding_train.shape) train_labels = pickle.load(open(train_labels_path, 'rb')) test_labels = pickle.load(open(test_labels_path, 'rb')) print(train_labels.shape) m = len(encoding_train) train_m = int(m * train_percentage) sel = random.sample(range(m), train_m) X = encoding_train.astype(np.float)[sel] y = train_labels[sel] print(X.shape) print(y.shape) m = len(encoding_test) test_m = int(m * test_percentage) sel = random.sample(range(m), test_m) X_test = encoding_test.astype(np.float)[sel] y_test = test_labels[sel] print(X_test.shape) print(y_test.shape) knn = kNN() k_valus = [1, 3, 5, 7] for k in k_valus: knn.k = k acc_list = [] for _ in range(trial): acc = knn.evaluate(X, y, X_test, y_test) acc_list.append(acc) print(np.mean(np.array(acc_list))) nca = NCA(max_iter=100, learning_rate=0.01) nca.fit(X, y) x_train = nca.transform() x_test = nca.transform(X_test) for k in k_valus: knn.k = k acc_list = [] for _ in range(trial): acc = knn.evaluate(x_train, y, x_test, y_test) acc_list.append(acc) print(np.mean(np.array(acc_list)))
def train_NCA(gtdata, vid_paths): # set number of images to get from each track num_track = 20 if os.path.isfile('out_features.pkl'): infile = open('out_features.pkl', 'rb') feat_dict = pickle.load(infile) infile.close() new_features = feat_dict['features'] new_labels = feat_dict['labels'] else: new_features = [] new_labels = [] uniq_tracks = np.unique(gtdata['id']) for id_tr in trange(len(uniq_tracks), desc="get gt data"): track_id = uniq_tracks[id_tr] # Get all the indices that have the same tracking number indices = [i for i, x in enumerate(gtdata['id']) if x == track_id] frames = gtdata['frame'] frames = [frames[i] for i in indices] bboxes = gtdata['box'] bboxes = [bboxes[i] for i in indices] cameras = gtdata['cam'] cameras = [cameras[i] for i in indices] cam_frames = [] for i in range(len(frames)): cam_frames.append([cameras[i]] * len(frames[i])) #flatten lists frames = [item for sublist in frames for item in sublist] bboxes = [item for sublist in bboxes for item in sublist] cam_frames = [item for sublist in cam_frames for item in sublist] if len(frames) > num_track: indices = random.sample(range(len(frames)), num_track) frames = [frames[i] for i in indices] bboxes = [bboxes[i] for i in indices] cam_frames = [cam_frames[i] for i in indices] for i in range(len(frames)): # Get bbox from image vidpath = vid_paths[cam_frames[i]] cap = cv2.VideoCapture(vidpath) total_frames = cap.get(7) cap.set(1, frames[i]-1) ret, vid_frame = cap.read() bb = bboxes[i] bbox_img = cv2.cvtColor(vid_frame[bb[1]:bb[1]+bb[3],bb[0]:bb[0]+bb[2],:], cv2.COLOR_BGR2RGB) box_features = histogram_multires(bbox_img) # box_features.extend(rgbHist([bbox_img], 128)[0]) # box_features.extend(hsvHist([bbox_img], 128)[0]) # box_features.extend(labHist([bbox_img], 128)[0]) new_features.append(box_features) new_labels.append(track_id) cap.release() filename = 'out_features.pkl' outfile = open(filename, 'wb') pickle.dump({'features': new_features, 'labels': new_labels}, outfile) outfile.close() X = np.array(new_features) Y = np.array(new_labels) nca = NCA(init='pca', n_components=400, max_iter=1500, verbose=True) nca.fit(X, Y) filename = 'multires.pkl' outfile = open(filename, 'wb') pickle.dump(nca,outfile) outfile.close() return nca
return { 'cv_score': top_score, 'accuracy': accuracy, 'roc': roc, 'majority_accuracy': majority_accuracy, 'majority_roc': majority_roc } for d in range(len(dataset_collection)): print("Metric learning") nca1 = NCA(max_iter=1000, learning_rate=0.01) nca1.fit(x_train, y_train) t_x_train = nca1.transform() nca2 = NCA(max_iter=1000, learning_rate=0.01) nca2.fit(x_test, np.array(y_test)) t_x_test = nca2.transform() dat = [t_x_train, t_x_test] nn_metric = KNeighborsClassifier() nn_metric_params = { "n_neighbors": range(5, max(6, len(data) / 10)), 'leaf_size': range(30, 100) } classifier_stats['nn_metric'] = test_classifier(nn_metric, nn_metric_params, dat)
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) L = nca.transformer_ assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix())
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) L = nca.components_ assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix())
convergence_tol=1e-6, learn_rate=1e-6, verbose=True) start_time = time.time() lmnn.fit(pca.train_sample_projection, original_train_labels) end_time = time.time() print("Learning time: %s" % (end_time - start_time)) transformed_query_features = lmnn.transform(pca_query_features) transformed_gallery_features = lmnn.transform(pca_gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute NCA (Neighbourhood Components Analysis) Learning print("\n-----NCA-----") nca = NCA(max_iter=20, verbose=True) nca.fit(original_train_features, original_train_labels) transformed_query_features = nca.transform(query_features) transformed_gallery_features = nca.transform(gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute PCA_NCA Learning print("\n-----PCA_NCA-----") nca = NCA(max_iter=20, verbose=True) start_time = time.time() nca.fit(pca.train_sample_projection, original_train_labels) end_time = time.time() print("Learning time: %s" % (end_time - start_time)) transformed_query_features = nca.transform(pca_query_features) transformed_gallery_features = nca.transform(pca_gallery_features) compute_k_mean(num_of_clusters, transformed_query_features,
# http://contrib.scikit-learn.org/metric-learn/generated/metric_learn.NCA.html from metric_learn import NCA from sklearn.datasets import make_classification from sklearn.neighbors import KNeighborsClassifier nca = NCA() X, y = make_classification() nca.fit(X, y) knn = KNeighborsClassifier(metric=nca.get_metric()) knn.fit(X, y) print(knn.predict(X[0:2, :])) print(y[0:2])
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) L = nca.transformer_ assert_array_almost_equal(L.T.dot(L), nca.metric())
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n), learning_rate=0.01) nca.fit(self.X, self.y) L = nca.transformer() assert_array_almost_equal(L.T.dot(L), nca.metric())
def nca(data, label, dim): nca = NCA(num_dims=dim, max_iter=1000, learning_rate=0.01) nca.fit(data, label) result = nca.transform(data) return result