def MetricLearning(Data, mapping, remapping, Truth): #build metric learing Truth_Label = list(set(Truth.values())) X_color = np.array([data[0] for data in Data]) #print >> sys.stderr, "X_color concat X_shape", X_color.shape #X_color = np.array([data[0] for data in Data]) Y_color = np.array([Truth_Label.index(Truth[remapping[i]]) for i, data in enumerate(Data)]) print >> sys.stderr, "X_color", X_color.shape, Y_color.shape s = time.time() lmnn_color = LMNN(k=5, min_iter=0, max_iter=400, learn_rate=1e-6) lmnn_color.fit(X_color, Y_color, verbose=True) print >> sys.stderr, time.time() - s, "color learning done" '''X_shape = np.array([data[1] for data in Data]) Y_shape = np.array([Truth_Label.index(Truth[remapping[i]]) for i, data in enumerate(Data)]) print >> sys.stderr, "X_shape", X_shape.shape, Y_shape.shape s = time.time() lmnn_shape = LMNN(k=20, min_iter=0, max_iter=1, learn_rate=1e-6) lmnn_shape.fit(X_shape, Y_shape, verbose=True) print >> sys.stderr, time.time() - s, "shape learning done" return lmnn_color, lmnn_shape''' return lmnn_color
def test_iris(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25)
def constructSimilartyMatrixLMNN(self, ks): print('now doing LMNN for k= ', ks) self.y_train = self.y_train.reshape(-1, ) lmnn = LMNN(k=ks, learn_rate=1e-7, max_iter=1000) lmnn.fit(self.trainVectorsPCA, self.y_train) self.L_lmnn = lmnn.transformer() name = 'lmnn/LMNN transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) np.save(name, self.L_lmnn) print('L.shape is ', self.L_lmnn.shape, '\n\n') # Input data transformed to the metric space by X*L.T self.transformedTrainLMNN = copy(lmnn.transform(self.trainVectorsPCA)) self.transformedTestLMNN = copy(lmnn.transform(self.testVectorsPCA)) self.transformedAllLMNN = copy(lmnn.transform( self.allDataPCA)) #we compute the pairwise distance on this now projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedAllLMNN) self.pwdis = copy( pairwise_distances(self.transformedAllLMNN, metric='euclidean')) self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i, allnearestNeighbours[ks:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print('accuracy for LMNN for k= ', ks, '\n') self.labelPropogation()
def constructSimilartyMatrixLMNN(self,ks): print 'now doing LMNN for k= ',ks self.y_train=self.y_train.reshape(-1,) lmnn=LMNN(k=ks, learn_rate=1e-7,max_iter=3000) lmnn.fit(self.trainVectorsPCA, self.y_train, verbose=False) self.L_lmnn = lmnn.transformer() name='lmnn/LMNN transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape) np.save(name,self.L_lmnn) print 'L.shape is ',self.L_lmnn.shape,'\n\n' # Input data transformed to the metric space by X*L.T self.transformedTrainLMNN=copy(lmnn.transform(self.trainVectorsPCA)) self.transformedTestLMNN=copy(lmnn.transform(self.testVectorsPCA)) self.transformedAllLMNN=copy(lmnn.transform(self.allDataPCA)) #we compute the pairwise distance on this now projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedAllLMNN) plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.labels) plt.title('LMNN Transformed ALL set projected to 2 Dimensions by TSNE with k='+str(ks)) plt.savefig(pp,format='pdf') self.pwdis=copy(pairwise_distances(self.transformedAllLMNN,metric='euclidean')) self.D=np.zeros(self.pwdis.shape) for i in range(0,self.pwdis.shape[0]): l1=self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i,allnearestNeighbours[ks:]]=0 self.D[i,i]=sum(self.pwdis[i]) print 'accuracy for LMNN for k= ',ks,'\n' self.labelPropogation()
def LMNN(self): print "Warning, the features will be transformed" lmnn = LMNN(k=5, learn_rate = 1e-6) lmnn.fit(self.features, targets) self.features = lmnn.transform(self.features) self.prepare_for_testing() self.nearest_neighbors("LMNN + KNN")
def runLMNN(X_train, X_test, y_train, t_test, k): transformer = LMNN(k=k, learn_rate=1e-6, convergence_tol=0.1, verbose=True) transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_LMNN_' + str(k), X_train_proj) np.save('X_test_LMNN_' + str(k), X_test_proj) return X_train_proj, X_test_proj
def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with # this issue: https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() assert "LMNN converged with objective" in out
def process_lmnn(self, **option): '''Metric Learning algorithm: LMNN''' GeneExp = self.GeneExp_train Label = self.Label_train lmnn = LMNN(**option) lmnn.fit(GeneExp, Label) self.Trans['LMNN'] = lmnn.transformer()
def train_test(self, x_train, y_train, x_test=None, cuisines=None, k=15): torch.cuda.empty_cache() x_train = np.array(x_train) testing = x_test is not None if testing: x_tr = torch.tensor(x_train) y_tr = torch.tensor(y_train) x_val = torch.tensor(x_test) dist = self.get_dist(x_tr, x_val) y_pred = self.predict(dist, y_tr, k) ids = [cuisine.id for cuisine in cuisines] pred_cuisines = [ self.dataset.id2cuisine[label] for label in y_pred ] self._write2csv(ids, pred_cuisines) else: shuffle_idx = torch.randperm(x_train.shape[0]) x_train = torch.tensor(x_train).float() y_train = torch.tensor(y_train) x_train = x_train[shuffle_idx] y_train = y_train[shuffle_idx] x_val = x_train[35000:] x_tr = x_train[:35000] y_val = y_train[35000:] y_tr = y_train[:35000] use_DML = False if use_DML: x_val = x_train[5000:6000] x_tr = x_train[:5000] y_val = y_train[5000:6000] y_tr = y_train[:20000] x_tr, x_val = self.PCA(x_tr, x_val, 64) lmnn = LMNN(k=15, learn_rate=1e-6, min_iter=50, max_iter=100) lmnn.fit(x_tr.numpy(), y_tr.numpy()) M = lmnn.get_mahalanobis_matrix() M = torch.tensor(M).float() n, d = x_val.shape m = x_tr.shape[0] x0 = x_tr.unsqueeze(1).expand(-1, n, -1).contiguous().view(-1, d) x1 = x_val.unsqueeze(0).expand(m, -1, -1).contiguous().view(-1, d) x = x0 - x1 dist0 = torch.mm(M, x.t().contiguous()) dists = dist0.t().contiguous() * x dist = dists.sum(1).view(m, n) else: x_tr, x_val = self.PCA(x_tr, x_val, 500) dist = self.get_dist(x_tr, x_val).cpu() for k in [1, 3, 5, 8, 10, 15, 20, 25, 30]: y_pred = self.predict(dist, y_tr, k) acc = (y_pred == y_val).sum().float().numpy() / y_val.shape[0] print("K=", k, " acc=", acc) torch.cuda.empty_cache()
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def test_lmnn(self): lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def LMNN_Metric(datamatrix, datalabel): Dis_Matrix = np.zeros((len(datalabel), len(datalabel))) lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(datamatrix, datalabel) metric_func = lmnn.get_metric() for i in range(len(datalabel)): for j in range(len(datalabel)): Dis_Matrix[i, j] = metric_func(datamatrix[i], datamatrix[j]) return Dis_Matrix
def draw_knn_with_lmnn(k, metric): names = ['x', 'y', 'color'] df = pd.DataFrame(mapped_colors, columns=names) # print(df.head()) X = np.array(df.ix[:, 0:2]) y = np.array(df['color']) lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, y) X_lmnn = lmnn.transform() X = X_lmnn # print(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) if metric == 'mahalanobis': knn = KNeighborsClassifier( n_neighbors=k, metric=metric, metric_params={'V': np.cov(np.transpose(X))}) else: knn = KNeighborsClassifier(n_neighbors=k, metric=metric) knn.fit(X_train, y_train) pred = knn.predict(X_test) err = 1 - accuracy_score(y_test, pred) print('\nThe error is ' + str(err * 100)) h = .02 cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("3-Class classification (k = %i)" % k)
class GeoLMNN(neighbors.KNeighborsClassifier): def __init__(self, n_neighbors=3): super(GeoLMNN, self).__init__(n_neighbors=n_neighbors) self.lmnn = LMNN(n_neighbors) def fit(self, X, y): self.lmnn.fit(X, y) super(GeoLMNN, self).fit(self.lmnn.transform(X), y) def predict(self, X): y = super(GeoLMNN, self).predict(self.lmnn.transform(X)) return y
class LMNNClassifier(BaseEstimator, ClassifierMixin): def __init__(self, k=3, pca=None, train=True, mu=0.5): self.k = k self.train = train self.pca = pca self.pca_trasform = None self.mu = mu self.lmnn = LMNN(k=k, use_pca=False, max_iter=10000, regularization=mu) def fit(self, x, y=None): n, d = x.shape if self.pca is not None: pca = PCA(n_components=self.pca) pca.fit(x) self.pca_trasform = pca.transform x = pca.transform(x) if self.train: self.lmnn.fit(x, y) self.knn = KNeighborsClassifier( n_neighbors=self.k, metric='mahalanobis', metric_params=dict(VI=self.lmnn.metric()), n_jobs=-1) else: self.knn = KNeighborsClassifier(n_neighbors=self.k) self.knn.fit(x, y) return self def predict(self, x, y=None): if self.pca_trasform is not None: x = self.pca_trasform(x) return self.knn.predict(x, y) def score(self, x, y=None): if self.pca_trasform is not None: x = self.pca_trasform(x) return self.knn.score(x, y) def set_params(self, **parameters): for parameter, value in parameters.items(): setattr(self, parameter, value) if parameter == 'mu': setattr(self.lmnn, 'regularization', value) return self
class LP: def __init__(self, lmnn=False, max_iter=1000, lm_num=200): # self.clf = LabelPropagation(kernel='knn',max_iter=1000,n_jobs=10,n_neighbors=25) self.clf = LabelSpreading(kernel='knn', n_neighbors=25, max_iter=max_iter, alpha=0.2, n_jobs=-1) self.lmnn = lmnn self.lm_num = lm_num if lmnn: self.ml = LMNN(use_pca=False, max_iter=2000) def fit(self, X, y): if self.lmnn: nonzero_index = np.nonzero(y) index = random.sample(list(nonzero_index[0]), self.lm_num) X_ = X[index] y_ = y[index] print('ml fitting') self.ml.fit(X_, y_) print('transform') X = self.ml.transform(X) print('lp fitting') zero_index = np.nonzero(y == 0) negetive_index = np.nonzero(y == -1) positive_index = np.nonzero(y == 1) y[zero_index] = -1 y[negetive_index] = 2 print(zero_index[0].shape, negetive_index[0].shape, positive_index[0].shape) self.clf.fit(X, y) def predict(self, X): print('lp predict') if self.lmnn: X = self.ml.transform(X) y_pred = self.clf.predict(X) negative_index = np.nonzero(y_pred == -1) two_index = np.nonzero(y_pred == 2) y_pred[negative_index] = 0 y_pred[two_index] = -1 return y_pred
def test_no_twice_same_objective(capsys): # test that the objective function never has twice the same value # see https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() lines = re.split("\n+", out) # we get only objectives from each line: # the regexp matches a float that follows an integer (the iteration # number), and which is followed by a (signed) float (delta obj). It # matches for instance: # 3 **1113.7665747189938** -3.182774197440267 46431.0200999999999998e-06 objectives = [re.search(r"\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) for s in lines] objectives = [match.group(1) for match in objectives if match is not None] # we remove the last element because it can be equal to the penultimate # if the last gradient update is null assert len(objectives[:-1]) == len(set(objectives[:-1]))
def baseline_model(X_train,y_train,X_test,y_test): #dimension reduction feature_selection = LinearSVC(C=1, penalty="l1", dual=False) X_train_reduced = feature_selection.fit_transform(X_train, y_train) X_test_reduced = feature_selection.transform(X_test) #metrics learning ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7) ml.fit(X_train_reduced,y_train) X_train_new = ml.transform(X_train_reduced) X_test_new = ml.transform(X_test_reduced) neigh = KNeighborsClassifier(n_neighbors=4) neigh.fit(X_train_new, y_train) predicted = neigh.predict(X_test_new) #pickle.dump(ml, open('dist_metrics', 'w')) return predicted
def baseline_model(X_train, y_train, X_test, y_test): #dimension reduction feature_selection = LinearSVC(C=1, penalty="l1", dual=False) X_train_reduced = feature_selection.fit_transform(X_train, y_train) X_test_reduced = feature_selection.transform(X_test) #metrics learning ml = LMNN(k=4, min_iter=50, max_iter=1000, learn_rate=1e-7) ml.fit(X_train_reduced, y_train) X_train_new = ml.transform(X_train_reduced) X_test_new = ml.transform(X_test_reduced) neigh = KNeighborsClassifier(n_neighbors=4) neigh.fit(X_train_new, y_train) predicted = neigh.predict(X_test_new) #pickle.dump(ml, open('dist_metrics', 'w')) return predicted
class KNNClassifier(BaseEstimator, ClassifierMixin): def __init__(self, k=1): self.k = k self.distanceEstimator = LMNN(k=k) def fit(self, X, y): #TODO msati3: Ideally, LMNN should expose fit_transform. self.distanceEstimator.fit(X, y) self.modelData = self.distanceEstimator.transform(X) self.modelLabels = y return self def transform(self, X): return self.distanceEstimator.transform(X) def predict(self, D): X = self.transform(D) #Pretransform so that euclidean metric suffices distances = distance.cdist(X, self.modelData,'sqeuclidean') topKIndexes = bn.argpartsort(distances, self.k)[:,:self.k] predictions = self.modelLabels[topKIndexes] return stats.mode(predictions, axis=1)[0] def score(self, X, y, fNormalize=True): return accuracy_score(self.predict(X), y, fNormalize)
def lmnn_fit(X_train, Y_train, X_test, Y_test, color_map): lmnn = LMNN(init='pca', k=3, learn_rate=5e-4, max_iter=500000, regularization=0.2) lmnn.fit(X_train, Y_train) X_train_transformed = lmnn.transform(X_train) if (X_train.shape[1] == 2): plt.figure() plt.scatter(X_train_transformed[:, 0], X_train_transformed[:, 1], c=color_map[Y_train], s=2) plt.savefig("after_lmnn_transform_train.png", dpi=300) X_test_transformed = lmnn.transform(X_test) if (X_test.shape[1] == 2): plt.figure() plt.scatter(X_test_transformed[:, 0], X_test_transformed[:, 1], c=color_map[Y_test], s=2) plt.savefig("after_lmnn_transform_test.png", dpi=300) return (X_train_transformed, X_test_transformed)
pca = PCA(original_train_features, M=500) pca.fit() pca_query_features = pca.project(query_features) pca_gallery_features = pca.project(gallery_features) compute_k_mean(num_of_clusters, pca_query_features, pca_gallery_features, gallery_labels) # Compute LMNN (Large Margin Nearest Neighbour) Learning print("\n-----LMNN------") lmnn = LMNN(k=5, max_iter=20, use_pca=False, convergence_tol=1e-6, learn_rate=1e-6, verbose=True) lmnn.fit(original_train_features, original_train_labels) transformed_query_features = lmnn.transform(query_features) transformed_gallery_features = lmnn.transform(gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute PCA_LMNN Learning print("\n-----PCA_LMNN-----") lmnn = LMNN(k=5, max_iter=20, use_pca=False, convergence_tol=1e-6, learn_rate=1e-6, verbose=True) start_time = time.time() lmnn.fit(pca.train_sample_projection, original_train_labels)
def lmnn(x_train, y_train, x_test): lmnn = LMNN(max_iter=50, k=9, verbose=True) print("It is") lmnn.fit(x_train, y_train) print("done") return lmnn.transform(x_test)
def main(params): initialize_results_dir(params.get('results_dir')) backup_params(params, params.get('results_dir')) print('>>> loading data...') X_train, y_train, X_test, y_test = LoaderFactory().create( name=params.get('dataset'), root=params.get('dataset_dir'), random=True, seed=params.getint('split_seed'))() print('<<< data loaded') print('>>> computing psd matrix...') if params.get('algorithm') == 'identity': psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype) elif params.get('algorithm') == 'nca': nca = NCA(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) nca.fit(X_train, y_train) psd_matrix = nca.get_mahalanobis_matrix() elif params.get('algorithm') == 'lmnn': lmnn = LMNN(init='auto', verbose=True, random_state=params.getint('algorithm_seed')) lmnn.fit(X_train, y_train) psd_matrix = lmnn.get_mahalanobis_matrix() elif params.get('algorithm') == 'itml': itml = ITML_Supervised(verbose=True, random_state=params.getint('algorithm_seed')) itml.fit(X_train, y_train) psd_matrix = itml.get_mahalanobis_matrix() elif params.get('algorithm') == 'lfda': lfda = LFDA() lfda.fit(X_train, y_train) psd_matrix = lfda.get_mahalanobis_matrix() elif params.get('algorithm') == 'arml': learner = TripleLearner( optimizer=params.get('optimizer'), optimizer_params={ 'lr': params.getfloat('lr'), 'momentum': params.getfloat('momentum'), 'weight_decay': params.getfloat('weight_decay'), }, criterion=params.get('criterion'), criterion_params={'calibration': params.getfloat('calibration')}, n_epochs=params.getint('n_epochs'), batch_size=params.getint('batch_size'), random_initialization=params.getboolean('random_initialization', fallback=False), update_triple=params.getboolean('update_triple', fallback=False), device=params.get('device'), seed=params.getint('learner_seed')) psd_matrix = learner(X_train, y_train, n_candidate_mins=params.getint('n_candidate_mins', fallback=1)) else: raise Exception('unsupported algorithm') print('<<< psd matrix got') np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'), psd_matrix)
print('Data Preparation Done', '\n') #print(FSTrainData.max(axis=0) - FSTrainData.min(axis=0)) #print(len(FSTrainData[0])) #print(len(FSTestData[0])) #print(len(FSTestData)) #print(len(TestData)) #print(TrainData) #print(type(TrainData)) #print(TrainLabels) #print(type(TrainLabels)) if Method == 'LMNN': print("Method: LMNN", '\n') lmnn = LMNN(k=3, learn_rate=1e-6, verbose=False) x = lmnn.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'COV': print("Method: COV", '\n') cov = Covariance().fit(FSTrainData) TFSTestData = cov.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'ITML': print("Method: ITML", '\n') itml = ITML_Supervised(num_constraints=200, A0=None) x = itml.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n')
Result_of_acc_ave = np.zeros([len(datasets) * 2, len(classifiers)]) Result_of_acc_std = np.zeros([len(datasets) * 2, len(classifiers)]) for i in range(len(datasets)): print(datasets[i]) new_path = os.path.join('.\data', datasets[i]) Data_Origi, DataLabel, n_samples, n_attr, n_class = PF.Load_Data(new_path) #归一化处理 scaler = MinMaxScaler() scaler.fit(Data_Origi) Data_Origi = scaler.transform(Data_Origi) for l in range(2): if l == 0: #度量学习 lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(Data_Origi, DataLabel) Data_trans = lmnn.transform(Data_Origi) else: Data_trans = Data_Origi #同质化融合 Dis_Matrix = PF.Calcu_Dis(Data_trans) CompareMatrix = PF.CompareNoiseLabel(Dis_Matrix, DataLabel) Cluster_Checked = PF.Affinity_propagatio_Modify(CompareMatrix) lap_ratio = PF.Count(Cluster_Checked, set_vlaue, n_samples) Result_of_Upper[i, l] = 1 - lap_ratio for j in range(len(classifiers)): print(classifiers[j]) clf = classifiers[j] scores = cross_val_score(clf, Data_trans, DataLabel, cv=cv) Result_of_acc_ave[2 * i + l, j] = scores.mean()
from modshogun import LMNN as shogun_LMNN from modshogun import RealFeatures, MulticlassLabels import numpy as np from metric_learn import LMNN from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False)
vectors = vectorizer.transform(data) print 'vectorizer is ' ,vectors[0].todense() itml=ITML() arr2=copy(vectors.todense()) arr=np.zeros((vectors.shape[0],vectors.shape[1])) for i in range(0,vectors.shape[0]): for j in range(0,vectors.shape[1]): arr[i,j]=arr2[i,j] print 'arr .shape is ',arr.shape target=newsgroups_train.target lab=[] for i in target: lab.append(i) lab=np.asarray(lab) print 'lab is ',(lab) print 'target is ',type(arr) #C=itml.prepare_constraints(target,vectors.shape[0],200) #itml.fit(arr,C,verbose=False) lmnn = LMNN(k=20, learn_rate=1e-3,use_pca=True) lmnn.fit(arr,target,verbose=False) print 'Now doing LMNN' l=lmnn.transformer() np.save('LMNN transformer',l)
nonneg_train_x = train_x - train_x.min() nonneg_test_x = test_x - test_x.min() mnb.fit(nonneg_train_x, train_y) mnbrec.append( float((mnb.predict(nonneg_test_x) == test_y).sum())/ len(test_y) ) bnb.fit(train_x, train_y) bnbrec.append( float((bnb.predict(test_x) == test_y).sum())/ len(test_y) ) svm.fit(train_x, train_y) svmrec.append( float((svm.predict(test_x) == test_y).sum())/ len(test_y) ) _ = PCA(n_components=20).fit(train_x) train_x = _.transform(train_x) test_x = _.transform(test_x) print train_x.shape L = lmnn.fit(train_x, train_y, verbose=True).L lmnnrec.append( knn(np.dot(train_x, L), train_y, np.dot(test_x, L), test_y, K=5) ) print '\tSVM accuracy: {} = {}'.format(svmrec, np.mean(svmrec)) print '\tLMNN accuracy: {} = {}'.format(lmnnrec, np.mean(lmnnrec)) print '\tGaussianNB accuracy: {} = {}'.format(gnbrec, np.mean(gnbrec)) print '\tMultinomiaNB accuracy: {} = {}'.format(mnbrec, np.mean(mnbrec)) print '\tBernoulliNB accuracy: {} = {}'.format(bnbrec, np.mean(bnbrec)) # lmnnavr.append(np.mean(lmnnrec)) # gnbavr.append(np.mean(gnbrec)) # svmavr.append(np.mean(svmrec)) # svmavr = [] # lmnnavr = []
model.val_indices, batch_size=256, num_workers=16) tembeds = torch.from_numpy(tembeds).cuda() tlabels = torch.from_numpy(tlabels).cuda() vembeds = torch.from_numpy(vembeds).cuda() vlabels = torch.from_numpy(vlabels).cuda() # run LMNN if n > 1: lmnn = LMNN(k=get_k(tlabels.cpu().numpy()), learn_rate=1e-4, verbose=True, max_iter=5000) lmnn.fit(tembeds.cpu().numpy(), tlabels.cpu().numpy()) W_cuda = torch.from_numpy(lmnn.components_.T).cuda().float() #top1 knn top1_knn_before, top3_knn_before = run_simulation( tembeds, tlabels, vembeds, vlabels) if n > 1: # transform into LMNN found space tembeds = torch.matmul(tembeds, W_cuda) vembeds = torch.matmul(vembeds, W_cuda) # top1 lmnn top1_lmnn_before, top3_lmnn_before = run_simulation( tembeds, tlabels, vembeds, vlabels) else: top1_lmnn_before, top3_lmnn_before = 0, 0
stat = Identity(degree=4, cross=True) mode = stat.statistics([[mode[i, :]] for i in range(mode.shape[0])]) print(mode.shape) label = [1 for i in range(16)] + [2 for i in range(16) ] + [3 for i in range(16)] print(label) from metric_learn import LMNN metric = LMNN(init='auto', k=6, min_iter=10000, max_iter=50000, convergence_tol=1e-6, learn_rate=1e-10, regularization=.5, n_components=2) metric.fit(mode, label) L = metric.components_ np.savez('L_all_3_cross_parameters.npz', L=L) L = np.load('L_all_3_cross_parameters.npz')['L'] mode_lmnn = mode.dot(L.T) print(mode_lmnn.shape) import pylab as plt plt.figure() plt.plot(mode_lmnn[:16, 0], mode_lmnn[:16, 1], 'k*', label='Patients with COPD') plt.plot(mode_lmnn[16:32, 0], mode_lmnn[16:32, 1],
X_gallery_pca, camId_gallery, y_gallery, metric ='mahalanobis', parameters = M) rank_accuracies_l_2.append(rank_accuracies) mAP_l_2.append(mAP) metric_l_2.append('Learnt Mahalanobis (Red. Set)') # In[24]: from metric_learn import LMNN lmnn = LMNN(k=3, learn_rate=1e-6, max_iter=50) lmnn.fit(X_train_pca, y_train) M = lmnn.metric() print ('Metric learnt') rank_accuracies, mAP = evaluate_metric(X_query_pca, camId_query, y_query, X_gallery_pca, camId_gallery, y_gallery, metric ='mahalanobis', parameters = M) rank_accuracies_l_2.append(rank_accuracies) mAP_l_2.append(mAP)
import numpy as np X_train = np.array(X_train) Y_train = np.array(Y_train) X_test = np.array(X_test) Y_test = np.array(Y_test) ## tuning here ... scores = [] #for i in range(1,5): #print("current k is ",i) lmnn2 = LMNN(k=5, learn_rate=1e-6) #.fit(X_train,Y_train) print("here2") print(lmnn2) lmnn2 = lmnn2.fit(X_train, Y_train) print("hi") X_train2 = lmnn2.transform(X_train) X_test2 = lmnn2.transform(X_test) kn2 = KNeighborsClassifier(n_neighbors=40).fit(X_train2, Y_train) predict = kn2.predict(X_test2) lmnn_acc = accuracy_score(Y_test, predict) print("lmnn accuracy is ", lmnn_acc) #scores.append(lmnn_acc) #print("the scores are ",scores) #k=np.argmax(scores)+1 #%%using kernal pca from scipy.spatial.distance import pdist, squareform from scipy import exp from scipy.linalg import eigh
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.transformer_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
loader = DataLoader(dataset=trainset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) embs = [] labels = [] with torch.no_grad(): for i, batch in tqdm(enumerate(loader, 1), total=len(loader), desc='embedding'): if torch.cuda.is_available(): data, label = batch[0].cuda(), batch[1] else: data, label = batch data_emb = model.encoder(data) embs.append(data_emb) labels.append(label) embs = torch.cat(embs).cpu().numpy() labels = torch.cat(labels).numpy() lmnn = LMNN(verbose=True) print('fitting data....') lmnn.fit(embs, labels) print('fitting data finished.') directory = 'checkpoints/lmnn/' if not osp.exists(directory): os.makedirs(directory) joblib.dump(lmnn, osp.join(directory, '%s.pkl' % args.filename))
c=_tango_color(prototype_label), marker='.') p.axis('equal') y = [] x = [] with open('segmentation.data') as f: for line in f: v = line.split(',') y.append(v[0]) x.append(v[1:]) x = np.asarray(x, dtype='float64') y = np.asarray(y) lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(x, y) x_t = lmnn.transform(x) p1 = plt.subplot(231) p1.scatter(x_t[:, 0], x_t[:, 1], c=_to_tango_colors(y, 0)) p1.axis('equal') p1.set_title('LMNN') # GLVQ glvq = GlvqModel() glvq.fit(x, y) p2 = plt.subplot(232) p2.set_title('GLVQ') plot(PCA().fit_transform(x), y, glvq.predict(x), glvq.w_, glvq.c_w_, p2) # GRLVQ
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.components_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
# from modshogun import LMNN as shogun_LMNN # from modshogun import RealFeatures, MulticlassLabels # import numpy as np from metric_learn import LMNN from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False)
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.transformer() assert_array_almost_equal(L.T.dot(L), lmnn.metric())
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.transformer_ assert_array_almost_equal(L.T.dot(L), lmnn.metric())