def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = pyplot.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) num_constraints = 60 mls = [(LMNN(), (x, y)), (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))), (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))), (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))] for ax_num, (ml, args) in zip(xrange(3, 7), mls): ml.fit(*args) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = pyplot.subplot(3, 2, ax_num) plot_sandwich_data(tx, y, ax) plot_neighborhood_graph(tx, ml_knn, y, ax) ax.set_title('%s space' % ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) pyplot.show()
def test_preprocessor_weakly_supervised(preprocessor, tuples, y_tuples): """Tests different ways to use the preprocessor argument: an array, a class callable, and a function callable, with a weakly supervised algorithm """ nca = ITML(preprocessor=preprocessor) nca.fit(tuples, y_tuples)
def test_iris(self): num_constraints = 200 n = self.iris_points.shape[0] C = ITML.prepare_constraints(self.iris_labels, n, num_constraints) itml = ITML().fit(self.iris_points, C, verbose=False) csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.4) # it's not great
def doClustering(X = None, y = None, initial = False, silent = True, numClusters = 4): takekmeans = True takeoptics = False if not silent: print("- doClustering") X, y = rd.readTransformedData() # metric learning X2 = X.iloc[:, 0:].values if initial == False: votesX, votesY = rd.readFeedbackData() pairs = [] for index, row in votesX.iterrows(): pairs.append((X2[row["id_punkt1"]], X2[row["id_punkt2"]])) a = votesY itml = ITML() itml.fit(pairs, a) if not silent: print("Transform") X2 = itml.transform(X2) if takekmeans == True: # Compute kMeans # print("numCluster",numClusters) # number_clusters = numClusters kmeans = KMeans(n_clusters=numClusters , random_state=0).fit(X2) labels = kmeans.labels_ labels_true = y core_samples_mask = [0] * len(y) elif takeoptics == True: opt = OPTICS(min_samples=30, xi=.05) # opt = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05) opt.fit(X2) labels = opt.labels_ labels_true = y core_samples_mask = [0] * len(y) else: # Compute DBSCAN # db = DBSCAN(eps=0.1, min_samples=10).fit(X2) db = DBSCAN(eps=0.6, min_samples=5).fit(X2) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ labels_true = y writeClusteringResult(X2, labels, labels_true, core_samples_mask) if not silent: print("+ doClustering") return 1
def test_bounds_parameters_valid(bounds): """Asserts that we can provide any array-like of two elements as bounds, and that the attribute bound_ is a numpy array""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] itml = ITML() itml.fit(pairs, y_pairs, bounds=bounds) X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) itml_supervised = ITML_Supervised() itml_supervised.fit(X, y, bounds=bounds)
def test_bounds_parameters_invalid(bounds): """Assert that if a non array-like is put for bounds, or an array-like of length different than 2, an error is returned""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] itml = ITML() with pytest.raises(Exception): itml.fit(pairs, y_pairs, bounds=bounds) X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) itml_supervised = ITML_Supervised() with pytest.raises(Exception): itml_supervised.fit(X, y, bounds=bounds)
def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = pyplot.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) num_constraints = 60 mls = [ (LMNN(), (x, y)), (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))), (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))), (LSML(), (x, LSML.prepare_constraints(y, num_constraints))) ] for ax_num, (ml,args) in zip(xrange(3,7), mls): ml.fit(*args) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = pyplot.subplot(3,2,ax_num) plot_sandwich_data(tx, y, ax) plot_neighborhood_graph(tx, ml_knn, y, ax) ax.set_title('%s space' % ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) pyplot.show()
def constructSimilartyMatrixITML(self, k=5): print 'Now doing itml' num_constraints = 100 itml = ITML() C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml = itml.transformer() name = 'itml/ITML transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) print 'L itml shape is ', self.L_itml.shape np.save(name, self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML = copy(itml.transform(self.trainVectorsPCA)) self.transformedTestITML = copy(itml.transform(self.testVectorsPCA)) self.transformedAllITML = copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedTrainITML) plt.scatter(projectedDigits[:, 0], projectedDigits[:, 1], c=self.y_train) plt.title( 'ITML Transformed Train set projected to 2 Dimensions by TSNE with k=' + str(k) + ' and num_constraints = ' + str(num_constraints)) plt.savefig(pp, format='pdf') self.pwdis = copy( pairwise_distances(self.transformedAllITML, metric='euclidean')) self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i, allnearestNeighbours[k:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print 'accuracy for ITML\n' self.labelPropogation()
def constructSimilartyMatrixITML(self,k=5): print 'Now doing itml' num_constraints=100 itml=ITML() C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml=itml.transformer() name='itml/ITML transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape) print 'L itml shape is ',self.L_itml.shape np.save(name,self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML=copy(itml.transform(self.trainVectorsPCA)) self.transformedTestITML=copy(itml.transform(self.testVectorsPCA)) self.transformedAllITML=copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedTrainITML) plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.y_train) plt.title('ITML Transformed Train set projected to 2 Dimensions by TSNE with k='+str(k)+' and num_constraints = '+str(num_constraints)) plt.savefig(pp,format='pdf') self.pwdis=copy(pairwise_distances(self.transformedAllITML,metric='euclidean')) self.D=np.zeros(self.pwdis.shape) for i in range(0,self.pwdis.shape[0]): l1=self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i,allnearestNeighbours[k:]]=0 self.D[i,i]=sum(self.pwdis[i]) print 'accuracy for ITML\n' self.labelPropogation()
def constructSimilartyMatrixITML(self): print 'Now doing itml' counter = 1 ks = [ 3, 5, 7, 10, 12, 15, 20, 22, 25, 27, 30, 33, 35, 37, 40, 43, 45, 47, 50, 53, 55, 57, 60, 65 ] constraints = [80, 100, 120, 150, 180, 200] constraints = [100] for k in ks: for num_constraints in constraints: itml = ITML() self.y_train = self.y_train.reshape(-1, ) C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml = copy(itml.transformer()) name = 'itml/ITML transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) + ' and k=' + str( k) + ' and num_constraints=' + str(num_constraints) #print 'L itml shape is ',self.L_itml.shape np.save(name, self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML = copy( itml.transform(self.trainVectorsPCA)) self.transformedTestITML = copy( itml.transform(self.testVectorsPCA)) self.transformedAllITML = copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedAllITML) print 'projectedDigits is ', projectedDigits.shape plt.figure() plt.scatter(projectedDigits[:, 0], projectedDigits[:, 1], c=self.labels) plt.title( 'ITML Transformed ALL set projected to 2 Dimensions by TSNE with' + str(k) + ' and num_constraints=' + str(num_constraints)) plt.savefig(pp, format='pdf') #plt.show() plt.close() self.pwdis = copy( pairwise_distances(self.transformedAllITML, metric='euclidean')) #sigmas=[1,2.5,2,2.5,3,3.5,4,4.5,5] #for sigma in sigmas: self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 #since we exponentiated the distances with the minus sign we need to set the lowest weights to 0, so everything except for the last k go to 0 self.pwdis[i, allnearestNeighbours[k:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print 'accuracy for ITML for k= ', k, ' and num_constraints=' + str( num_constraints), '\n' self.labelPropogation()
def fit(self, X, y): num_constraints = NUM_CONSTRAINTS constraints = ITML.prepare_constraints(y, len(X), num_constraints) return super(ITML_sk, self).fit(X, constraints)
# anyways c, target = shuffle(c, target, random_state=SEED) if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification),
c, target = shuffle(c, target, random_state=SEED) if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)]
c, target = shuffle(c, target, random_state=SEED) if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs) ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity',
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris from scipy.sparse import rand x = rand(10, 10) print 'S is ', x.todense() x = x.todense() mat1 = np.zeros(x.shape) for i in range(0, mat1.shape[0]): mat1[i, i] = 112.0 for j in range(0, mat1.shape[1]): if i == j: continue mat1[i, j] = x[i, j] print 'mat1 is ', mat1 y = np.ones((10, )) y[5:] = 0 itml = ITML() print 'X is ', mat1.shape, ' y is ', y.shape num_constraints = 5 C = ITML.prepare_constraints(y, mat1.shape[0], num_constraints) itml.fit(mat1, C, verbose=False) xl = itml.transform(mat1) print 'xl is ', xl
t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) # Try LMNN here. print("Trying ITML") param_grid = {''} itml = ITML(num_constraints=200) X_tr = itml.fit(X_train_pca, y_train).transform(X_train_pca) X_te = itml.transform(X_test_pca) acc, y_pred = classifier.sk_nearest_neighbour(X_tr, y_train, X_te, y_test) print("accuracy = %s",acc) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
y_lims = (y_center - max_diff / 2 - margin, y_center + max_diff / 2 + margin) x_lims = (x_center - max_diff / 2 - margin, x_center + max_diff / 2 + margin) plt.figure() for i, edge in enumerate(pairs): plt.plot(edge[:, 0], edge[:, 1], c='green' if y_pairs[i] == 1 else 'red', alpha=0.3) plt.scatter(pairs[:, 0, 0], pairs[:, 0, 1], c='b') plt.scatter(pairs[:, 1, 0], pairs[:, 1, 1], c='b') plt.xlim(*x_lims) plt.ylim(*y_lims) plt.axis('equal') plt.savefig(name) plot_points(pairs, y_pairs, 'pairs_without_metric') mmc = ITML() mmc.fit(pairs, y_pairs) X_e = mmc.transform(X) pairs = X_e[c].copy() plot_points(pairs, y_pairs, 'pairs_with_metric')
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] print 'Y is ',Y.shape print 'X.shape is ',X.shape itml = ITML() num_constraints = 200 C = ITML.prepare_constraints(Y, X.shape[0], num_constraints) itml.fit(X, C, verbose=False) x2=itml.transform(X) print 'x2 is ',x2 l=itml.transformer() print '\n\n\nafter transforming is ',np.dot(X,l.T)
# anyways c, target = shuffle(c, target, random_state=SEED) if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification),
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] print 'Y is ', Y.shape print 'X.shape is ', X.shape itml = ITML() num_constraints = 200 C = ITML.prepare_constraints(Y, X.shape[0], num_constraints) itml.fit(X, C, verbose=False) x2 = itml.transform(X) print 'x2 is ', x2 l = itml.transformer() print '\n\n\nafter transforming is ', np.dot(X, l.T)