class LFDA: def __init__(self): """Initializes the LFDA model""" self.metric_model = LFDA_ml() self.X_tr = None self.y_train = None self.X_te = None def fit(self, X_tr, y_train): """Fits the model to the prescribed data.""" self.X_tr = X_tr self.y_train = y_train return self.metric_model.fit(X_tr, y_train) def transform(self, X): """Transforms the test data according to the model""" return self.metric_model.transform(X) def predict_proba(self, X_te): """Predicts the probabilities of each of the test samples""" test_samples = X_te.shape[0] self.X_tr = self.transform(self.X_tr) clf = NearestCentroid() clf.fit(self.X_tr, self.y_train) centroids = clf.centroids_ probabilities = np.zeros((test_samples, centroids.shape[0])) for sample in xrange(test_samples): probabilities[sample] = sk_nearest_neighbour_proba( centroids, X_te[sample, :]) return probabilities
def process_lfda(self, **option): '''Metric Learning algorithm: LFDA''' GeneExp = self.GeneExp_train Label = self.Label_train lfda = LFDA(**option) lfda.fit(GeneExp, Label) self.Trans['LFDA'] = lfda.transformer()
def test_iris(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Sanity checks for learned matrices. self.assertEqual(lfda.get_mahalanobis_matrix().shape, (4, 4)) self.assertEqual(lfda.transformer_.shape, (2, 4))
def get_model(config): if config is None: return None if 'model_kwargs' not in config: model_kwargs = dict() else: model_kwargs = config['model_kwargs'] if config['model'] == 'svm': model = svm.SVC(**model_kwargs) elif config['model'] == 'rdf': model = RandomForestClassifier(**model_kwargs) elif config['model'] == 'adaboost': if 'sub_model' in config: base_estimator = get_model(config['sub_model']) else: base_estimator = None model = AdaBoostClassifier(base_estimator=base_estimator, **model_kwargs) elif config['model'] == 'gradient_boost': model = GradientBoostingClassifier(**model_kwargs) elif config['model'] == 'gaussion_bayes': model = naive_bayes.GaussianNB(**model_kwargs) elif config['model'] == 'mlp': model = MLPClassifier(**model_kwargs) elif config['model'] == 'k_neighbors': model = KNeighborsClassifier(**model_kwargs) elif config['model'] == 'decision_tree': model = DecisionTreeClassifier(**model_kwargs) elif config['model'] == 'voting': sub_models = [] for sub_model in config['sub_model']: sub_models.append([sub_model['model_name'], get_model(sub_model)]) model = VotingClassifier(sub_models, **model_kwargs) elif config['model'] == 'stacking': final_estimator = get_model(config.get('final_model', None)) sub_models = [] for sub_model in config['sub_model']: sub_models.append((sub_model['model_name'], get_model(sub_model))) model = StackingClassifier(estimators=sub_models, final_estimator=final_estimator, **model_kwargs) elif config['model'] == 'bagging': base_estimator = get_model(config.get('sub_model', None)) model = BaggingClassifier(base_estimator=base_estimator, **model_kwargs) elif config['model'] in ['lfda', 'lmnn', 'mmc']: if config['model'] == 'lfda': metric_learner = LFDA(**model_kwargs) elif config['model'] == 'lmnn': metric_learner = LMNN(**model_kwargs) elif config['model'] == 'mmc': metric_learner = MMC_Supervised(**model_kwargs) else: raise AttributeError if 'final_model' in config: final_model = get_model(config['final_model']) else: final_model = KNeighborsClassifier() model = Pipeline([('metric', metric_learner), ('final', final_model)]) else: raise AttributeError('unrecognized model %s' % config['model']) return model
def test_lfda(self): lfda = LFDA(k=2, n_components=2) lfda.fit(self.X, self.y) res_1 = lfda.transform(self.X) lfda = LFDA(k=2, n_components=2) res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay assert_array_almost_equal(abs(res_1), abs(res_2))
def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) res_1 = lfda.transform() lfda = LFDA(k=2, num_dims=2) res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay if np.sign(res_1[0, 0]) != np.sign(res_2[0, 0]): res_2 *= -1 assert_array_almost_equal(res_1, res_2)
def test_iris(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(), self.iris_labels) self.assertLess(csep, 0.15) # Sanity checks for learned matrices. self.assertEqual(lfda.metric().shape, (4, 4)) self.assertEqual(lfda.transformer().shape, (2, 4))
def fisher_discriminant(X, Y): model = LFDA() model.fit(X, Y) return model.transform(X), model.metric()
def runLFDA(X_train, X_test, y_train, y_test): transformer = LFDA() transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_LFDA', X_train_proj) np.save('X_test_LFDA', X_test_proj) return X_train_proj, X_test_proj
def test_iris(self): lfda = LFDA(k=2, n_components=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Sanity checks for learned matrices. self.assertEqual(lfda.get_mahalanobis_matrix().shape, (4, 4)) self.assertEqual(lfda.components_.shape, (2, 4))
def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) res_1 = lfda.transform(self.X) lfda = LFDA(k=2, num_dims=2) res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay if np.sign(res_1[0,0]) != np.sign(res_2[0,0]): res_2 *= -1 assert_array_almost_equal(res_1, res_2)
def get_dist_func( data: Array[np.float64], target: Array[np.float64] ) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64]: """ Get function that returns distances between examples in learned space. Args: data : Array[np.float64] - training data_trans target : int - target variable values (classes of training examples) Returns: Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher order function that takes a matric function and returns a function that takes two indices of examples and returns distance between examples in learned metric space. """ # Get transformed data. data_trans: Array[np.float64] = LFDA().fit_transform( StandardScaler().fit_transform(data), target) # Computing distance: def dist_func_res(metric: Callable[[np.float64, np.float64], np.float64], i1: np.int, i2: np.int) -> np.float64: """ distance function that takes indices of examples in training set and returns distance in learned space using specified distance metric. Args: i1 : int - index of first training example i2 : int - index of second training example Returns: np.float64 - distance in learned metric space using specified metric between specified training examples. """ # Compute distance in learned metric space using specified metric. return metric(data_trans[i1, :], data_trans[i2, :]) return dist_func_res # Return distance function.
X_train = np.load(osp.join(args.data_root, 'feature_train.npy')) y_train = np.load(osp.join(args.data_root, 'label_train.npy')) X_test = np.load(osp.join(args.data_root, 'feature_test.npy')) y_test = np.load(osp.join(args.data_root, 'label_test.npy')) return X_train, X_test, y_train, y_test if __name__ == '__main__': parser = argparse.ArgumentParser("LFDA") parser.add_argument('--data-root', default='./data/raw_split') parser.add_argument('--n-components', type=int, default=2) args = parser.parse_args() name = f"{args.n_components}" data_save_folder = f"./data/LFDA/{name}" makedirs(data_save_folder) X_train, X_test, y_train, y_test = load_split(args) print(X_train.shape) t = time.time() lfda = LFDA(n_components=args.n_components) lfda.fit(X_train, y_train) np.save(osp.join(data_save_folder, "feature_train.npy"), lfda.transform(X_train)) np.save(osp.join(data_save_folder, "label_train.npy"), y_train) np.save(osp.join(data_save_folder, "feature_test.npy"), lfda.transform(X_test)) np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
def test_preprocessor_supervised(preprocessor, points, y_points): """Tests different ways to use the preprocessor argument: an array, a class callable, and a function callable, with a supervised algorithm """ lfda = LFDA(preprocessor=preprocessor) lfda.fit(points, y_points)
def main(params): initialize_results_dir(params.get('results_dir')) backup_params(params, params.get('results_dir')) print('>>> loading data...') X_train, y_train, X_test, y_test = LoaderFactory().create( name=params.get('dataset'), root=params.get('dataset_dir'), random=True, seed=params.getint('split_seed'))() print('<<< data loaded') print('>>> computing psd matrix...') if params.get('algorithm') == 'identity': psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype) elif params.get('algorithm') == 'nca': nca = NCA(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) nca.fit(X_train, y_train) psd_matrix = nca.get_mahalanobis_matrix() elif params.get('algorithm') == 'lmnn': lmnn = LMNN(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) lmnn.fit(X_train, y_train) psd_matrix = lmnn.get_mahalanobis_matrix() elif params.get('algorithm') == 'itml': itml = ITML_Supervised(verbose=True, random_state=params.getint('algorithm_seed')) itml.fit(X_train, y_train) psd_matrix = itml.get_mahalanobis_matrix() elif params.get('algorithm') == 'lfda': lfda = LFDA() lfda.fit(X_train, y_train) psd_matrix = lfda.get_mahalanobis_matrix() elif params.get('algorithm') == 'arml': learner = TripleLearner( optimizer=params.get('optimizer'), optimizer_params={ 'lr': params.getfloat('lr'), 'momentum': params.getfloat('momentum'), 'weight_decay': params.getfloat('weight_decay'), }, criterion=params.get('criterion'), criterion_params={'calibration': params.getfloat('calibration')}, n_epochs=params.getint('n_epochs'), batch_size=params.getint('batch_size'), random_initialization=params.getboolean('random_initialization', fallback=False), update_triple=params.getboolean('update_triple', fallback=False), device=params.get('device'), seed=params.getint('learner_seed')) psd_matrix = learner(X_train, y_train, n_candidate_mins=params.getint('n_candidate_mins', fallback=1)) else: raise Exception('unsupported algorithm') print('<<< psd matrix got') np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'), psd_matrix)
def test_lfda(self): check_estimator(LFDA())
elif Method == 'COV': print("Method: COV", '\n') cov = Covariance().fit(FSTrainData) TFSTestData = cov.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'ITML': print("Method: ITML", '\n') itml = ITML_Supervised(num_constraints=200, A0=None) x = itml.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'LFDA': print("Method: LFDA", '\n') lfda = LFDA(k=4, dim=1) x = lfda.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'NCA': print("Method: NCA", '\n') #print('Max', TrainData.max(axis=0)) #print('sssssssss', len(TrainData[0])) #print('sssssssss', len(TrainData.max(axis=0))) #print('Min', TrainData.min(axis=0)) nca = NCA(max_iter=500, learning_rate=0.01) # print('ssssssss', TrainData) x = nca.fit(FSTrainData, TrainLabels)
def __init__(self): """Initializes the LFDA model""" self.metric_model = LFDA_ml() self.X_tr = None self.y_train = None self.X_te = None
quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors]))
t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) # Try LMNN here. print("Trying LFDA") param_grid = {''} lfda = LFDA() lfda = lfda.fit(X_train_pca, y_train) X_tr = lfda.transform(X_train_pca) X_te = lfda.transform(X_test_pca) acc, y_pred = classifier.sk_nearest_neighbour(X_tr, y_train, X_te, y_test) print("accuracy = %s", acc) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = {
def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) L = lfda.transformer_ assert_array_almost_equal(L.T.dot(L), lfda.metric())
def lfda(data, label, dim): lcda = LFDA(k=2, num_dims=dim) lcda.fit(data, label) result = lcda.transform(data) return result
def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) L = lfda.transformer_ assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix())
def test_iris(self): lfda = LFDA(k=2, dim=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(), self.iris_labels) self.assertLess(csep, 0.15)
def test_lfda(self): lfda = LFDA(k=2, n_components=2) lfda.fit(self.X, self.y) L = lfda.components_ assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix())
def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) L = lfda.transformer() assert_array_almost_equal(L.T.dot(L), lfda.metric())