def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = pyplot.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) num_constraints = 60 mls = [ (LMNN(), (x, y)), (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))), (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))), (LSML(), (x, LSML.prepare_constraints(y, num_constraints))) ] for ax_num, (ml,args) in zip(xrange(3,7), mls): ml.fit(*args) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = pyplot.subplot(3,2,ax_num) plot_sandwich_data(tx, y, ax) plot_neighborhood_graph(tx, ml_knn, y, ax) ax.set_title('%s space' % ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) pyplot.show()
def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = pyplot.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) num_constraints = 60 mls = [(LMNN(), (x, y)), (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))), (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))), (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))] for ax_num, (ml, args) in zip(xrange(3, 7), mls): ml.fit(*args) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = pyplot.subplot(3, 2, ax_num) plot_sandwich_data(tx, y, ax) plot_neighborhood_graph(tx, ml_knn, y, ax) ax.set_title('%s space' % ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) pyplot.show()
def test_iris(self): num_constraints = 200 n = self.iris_points.shape[0] C = ITML.prepare_constraints(self.iris_labels, n, num_constraints) itml = ITML().fit(self.iris_points, C, verbose=False) csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.4) # it's not great
def test_iris(self): num_constraints = 200 n = self.iris_points.shape[0] C = ITML.prepare_constraints(self.iris_labels, n, num_constraints) itml = ITML().fit(self.iris_points, C, verbose=False) csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.4) # it's not great
def constructSimilartyMatrixITML(self, k=5): print 'Now doing itml' num_constraints = 100 itml = ITML() C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml = itml.transformer() name = 'itml/ITML transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) print 'L itml shape is ', self.L_itml.shape np.save(name, self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML = copy(itml.transform(self.trainVectorsPCA)) self.transformedTestITML = copy(itml.transform(self.testVectorsPCA)) self.transformedAllITML = copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedTrainITML) plt.scatter(projectedDigits[:, 0], projectedDigits[:, 1], c=self.y_train) plt.title( 'ITML Transformed Train set projected to 2 Dimensions by TSNE with k=' + str(k) + ' and num_constraints = ' + str(num_constraints)) plt.savefig(pp, format='pdf') self.pwdis = copy( pairwise_distances(self.transformedAllITML, metric='euclidean')) self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i, allnearestNeighbours[k:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print 'accuracy for ITML\n' self.labelPropogation()
def constructSimilartyMatrixITML(self,k=5): print 'Now doing itml' num_constraints=100 itml=ITML() C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml=itml.transformer() name='itml/ITML transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape) print 'L itml shape is ',self.L_itml.shape np.save(name,self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML=copy(itml.transform(self.trainVectorsPCA)) self.transformedTestITML=copy(itml.transform(self.testVectorsPCA)) self.transformedAllITML=copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedTrainITML) plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.y_train) plt.title('ITML Transformed Train set projected to 2 Dimensions by TSNE with k='+str(k)+' and num_constraints = '+str(num_constraints)) plt.savefig(pp,format='pdf') self.pwdis=copy(pairwise_distances(self.transformedAllITML,metric='euclidean')) self.D=np.zeros(self.pwdis.shape) for i in range(0,self.pwdis.shape[0]): l1=self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i,allnearestNeighbours[k:]]=0 self.D[i,i]=sum(self.pwdis[i]) print 'accuracy for ITML\n' self.labelPropogation()
def constructSimilartyMatrixITML(self): print 'Now doing itml' counter = 1 ks = [ 3, 5, 7, 10, 12, 15, 20, 22, 25, 27, 30, 33, 35, 37, 40, 43, 45, 47, 50, 53, 55, 57, 60, 65 ] constraints = [80, 100, 120, 150, 180, 200] constraints = [100] for k in ks: for num_constraints in constraints: itml = ITML() self.y_train = self.y_train.reshape(-1, ) C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints) itml.fit(self.trainVectorsPCA, C, verbose=True) self.L_itml = copy(itml.transformer()) name = 'itml/ITML transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) + ' and k=' + str( k) + ' and num_constraints=' + str(num_constraints) #print 'L itml shape is ',self.L_itml.shape np.save(name, self.L_itml) # Input data transformed to the metric space by X*L.T self.transformedTrainITML = copy( itml.transform(self.trainVectorsPCA)) self.transformedTestITML = copy( itml.transform(self.testVectorsPCA)) self.transformedAllITML = copy(itml.transform(self.allDataPCA)) # now we can simply calculate the eucledian distances on the above transformed dataset #Visualizing the dataset by TSNE projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedAllITML) print 'projectedDigits is ', projectedDigits.shape plt.figure() plt.scatter(projectedDigits[:, 0], projectedDigits[:, 1], c=self.labels) plt.title( 'ITML Transformed ALL set projected to 2 Dimensions by TSNE with' + str(k) + ' and num_constraints=' + str(num_constraints)) plt.savefig(pp, format='pdf') #plt.show() plt.close() self.pwdis = copy( pairwise_distances(self.transformedAllITML, metric='euclidean')) #sigmas=[1,2.5,2,2.5,3,3.5,4,4.5,5] #for sigma in sigmas: self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 #since we exponentiated the distances with the minus sign we need to set the lowest weights to 0, so everything except for the last k go to 0 self.pwdis[i, allnearestNeighbours[k:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print 'accuracy for ITML for k= ', k, ' and num_constraints=' + str( num_constraints), '\n' self.labelPropogation()
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] print 'Y is ',Y.shape print 'X.shape is ',X.shape itml = ITML() num_constraints = 200 C = ITML.prepare_constraints(Y, X.shape[0], num_constraints) itml.fit(X, C, verbose=False) x2=itml.transform(X) print 'x2 is ',x2 l=itml.transformer() print '\n\n\nafter transforming is ',np.dot(X,l.T)
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris from scipy.sparse import rand x = rand(10, 10) print 'S is ', x.todense() x = x.todense() mat1 = np.zeros(x.shape) for i in range(0, mat1.shape[0]): mat1[i, i] = 112.0 for j in range(0, mat1.shape[1]): if i == j: continue mat1[i, j] = x[i, j] print 'mat1 is ', mat1 y = np.ones((10, )) y[5:] = 0 itml = ITML() print 'X is ', mat1.shape, ' y is ', y.shape num_constraints = 5 C = ITML.prepare_constraints(y, mat1.shape[0], num_constraints) itml.fit(mat1, C, verbose=False) xl = itml.transform(mat1) print 'xl is ', xl
def fit(self, X, y): num_constraints = NUM_CONSTRAINTS constraints = ITML.prepare_constraints(y, len(X), num_constraints) return super(ITML_sk, self).fit(X, constraints)
import numpy as np from metric_learn import ITML from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] print 'Y is ', Y.shape print 'X.shape is ', X.shape itml = ITML() num_constraints = 200 C = ITML.prepare_constraints(Y, X.shape[0], num_constraints) itml.fit(X, C, verbose=False) x2 = itml.transform(X) print 'x2 is ', x2 l = itml.transformer() print '\n\n\nafter transforming is ', np.dot(X, l.T)