예제 #1
0
def sandwich_demo():
  x, y = sandwich_data()
  knn = nearest_neighbors(x, k=2)
  ax = pyplot.subplot(3, 1, 1)  # take the whole top row
  plot_sandwich_data(x, y, ax)
  plot_neighborhood_graph(x, knn, y, ax)
  ax.set_title('input space')
  ax.set_aspect('equal')
  ax.set_xticks([])
  ax.set_yticks([])

  num_constraints = 60
  mls = [
      (LMNN(), (x, y)),
      (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))),
      (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))),
      (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))
  ]

  for ax_num, (ml,args) in zip(xrange(3,7), mls):
    ml.fit(*args)
    tx = ml.transform()
    ml_knn = nearest_neighbors(tx, k=2)
    ax = pyplot.subplot(3,2,ax_num)
    plot_sandwich_data(tx, y, ax)
    plot_neighborhood_graph(tx, ml_knn, y, ax)
    ax.set_title('%s space' % ml.__class__.__name__)
    ax.set_xticks([])
    ax.set_yticks([])
  pyplot.show()
예제 #2
0
def sandwich_demo():
    x, y = sandwich_data()
    knn = nearest_neighbors(x, k=2)
    ax = pyplot.subplot(3, 1, 1)  # take the whole top row
    plot_sandwich_data(x, y, ax)
    plot_neighborhood_graph(x, knn, y, ax)
    ax.set_title('input space')
    ax.set_aspect('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    num_constraints = 60
    mls = [(LMNN(), (x, y)),
           (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))),
           (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))),
           (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))]

    for ax_num, (ml, args) in zip(xrange(3, 7), mls):
        ml.fit(*args)
        tx = ml.transform()
        ml_knn = nearest_neighbors(tx, k=2)
        ax = pyplot.subplot(3, 2, ax_num)
        plot_sandwich_data(tx, y, ax)
        plot_neighborhood_graph(tx, ml_knn, y, ax)
        ax.set_title('%s space' % ml.__class__.__name__)
        ax.set_xticks([])
        ax.set_yticks([])
    pyplot.show()
예제 #3
0
  def test_iris(self):
    num_constraints = 200

    n = self.iris_points.shape[0]
    C = ITML.prepare_constraints(self.iris_labels, n, num_constraints)
    itml = ITML().fit(self.iris_points, C, verbose=False)

    csep = class_separation(itml.transform(), self.iris_labels)
    self.assertLess(csep, 0.4)  # it's not great
예제 #4
0
  def test_iris(self):
    num_constraints = 200

    n = self.iris_points.shape[0]
    C = ITML.prepare_constraints(self.iris_labels, n, num_constraints)
    itml = ITML().fit(self.iris_points, C, verbose=False)

    csep = class_separation(itml.transform(), self.iris_labels)
    self.assertLess(csep, 0.4)  # it's not great
예제 #5
0
    def constructSimilartyMatrixITML(self, k=5):
        print 'Now doing itml'
        num_constraints = 100
        itml = ITML()
        C = ITML.prepare_constraints(self.y_train,
                                     self.trainVectorsPCA.shape[0],
                                     num_constraints)
        itml.fit(self.trainVectorsPCA, C, verbose=True)
        self.L_itml = itml.transformer()

        name = 'itml/ITML transformer matrix with dataset shape ' + str(
            self.trainVectorsPCA.shape)
        print 'L itml shape  is ', self.L_itml.shape
        np.save(name, self.L_itml)

        # Input data transformed to the metric space by X*L.T

        self.transformedTrainITML = copy(itml.transform(self.trainVectorsPCA))
        self.transformedTestITML = copy(itml.transform(self.testVectorsPCA))
        self.transformedAllITML = copy(itml.transform(self.allDataPCA))
        # now we can simply calculate the eucledian distances on the above transformed dataset

        #Visualizing the dataset by TSNE
        projectedDigits = TSNE(random_state=randomState).fit_transform(
            self.transformedTrainITML)

        plt.scatter(projectedDigits[:, 0],
                    projectedDigits[:, 1],
                    c=self.y_train)
        plt.title(
            'ITML Transformed Train set projected to 2 Dimensions by TSNE with k='
            + str(k) + ' and num_constraints = ' + str(num_constraints))
        plt.savefig(pp, format='pdf')
        self.pwdis = copy(
            pairwise_distances(self.transformedAllITML, metric='euclidean'))
        self.D = np.zeros(self.pwdis.shape)
        for i in range(0, self.pwdis.shape[0]):
            l1 = self.pwdis[i].tolist()
            #print 'l1 is ',l1,'\n\n'
            allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i])
            #now set the all the weights except for k+1 to 0
            self.pwdis[i, allnearestNeighbours[k:]] = 0
            self.D[i, i] = sum(self.pwdis[i])

        print 'accuracy for ITML\n'
        self.labelPropogation()
 def constructSimilartyMatrixITML(self,k=5):
     print 'Now doing itml'
     num_constraints=100
     itml=ITML()
     C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints)
     itml.fit(self.trainVectorsPCA, C, verbose=True)
     self.L_itml=itml.transformer()
     
     name='itml/ITML transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape)
     print 'L itml shape  is ',self.L_itml.shape
     np.save(name,self.L_itml)
     
     # Input data transformed to the metric space by X*L.T
     
     self.transformedTrainITML=copy(itml.transform(self.trainVectorsPCA))
     self.transformedTestITML=copy(itml.transform(self.testVectorsPCA))
     self.transformedAllITML=copy(itml.transform(self.allDataPCA))
     # now we can simply calculate the eucledian distances on the above transformed dataset
     
     #Visualizing the dataset by TSNE
     projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedTrainITML)
     
     plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.y_train)
     plt.title('ITML Transformed Train set projected to 2 Dimensions by TSNE with k='+str(k)+' and num_constraints = '+str(num_constraints))
     plt.savefig(pp,format='pdf')
     self.pwdis=copy(pairwise_distances(self.transformedAllITML,metric='euclidean'))
     self.D=np.zeros(self.pwdis.shape)
     for i in range(0,self.pwdis.shape[0]):
         l1=self.pwdis[i].tolist()
         #print 'l1 is ',l1,'\n\n'
         allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
         #now set the all the weights except for k+1 to 0
         self.pwdis[i,allnearestNeighbours[k:]]=0
         self.D[i,i]=sum(self.pwdis[i])
     
     print 'accuracy for ITML\n'
     self.labelPropogation()
예제 #7
0
    def constructSimilartyMatrixITML(self):

        print 'Now doing itml'
        counter = 1
        ks = [
            3, 5, 7, 10, 12, 15, 20, 22, 25, 27, 30, 33, 35, 37, 40, 43, 45,
            47, 50, 53, 55, 57, 60, 65
        ]

        constraints = [80, 100, 120, 150, 180, 200]
        constraints = [100]
        for k in ks:
            for num_constraints in constraints:

                itml = ITML()
                self.y_train = self.y_train.reshape(-1, )
                C = ITML.prepare_constraints(self.y_train,
                                             self.trainVectorsPCA.shape[0],
                                             num_constraints)
                itml.fit(self.trainVectorsPCA, C, verbose=True)
                self.L_itml = copy(itml.transformer())

                name = 'itml/ITML transformer matrix with dataset shape ' + str(
                    self.trainVectorsPCA.shape) + ' and k=' + str(
                        k) + ' and num_constraints=' + str(num_constraints)
                #print 'L itml shape  is ',self.L_itml.shape
                np.save(name, self.L_itml)

                # Input data transformed to the metric space by X*L.T

                self.transformedTrainITML = copy(
                    itml.transform(self.trainVectorsPCA))
                self.transformedTestITML = copy(
                    itml.transform(self.testVectorsPCA))
                self.transformedAllITML = copy(itml.transform(self.allDataPCA))
                # now we can simply calculate the eucledian distances on the above transformed dataset

                #Visualizing the dataset by TSNE
                projectedDigits = TSNE(random_state=randomState).fit_transform(
                    self.transformedAllITML)

                print 'projectedDigits is ', projectedDigits.shape
                plt.figure()
                plt.scatter(projectedDigits[:, 0],
                            projectedDigits[:, 1],
                            c=self.labels)
                plt.title(
                    'ITML Transformed ALL set projected to 2 Dimensions by TSNE with'
                    + str(k) + ' and num_constraints=' + str(num_constraints))
                plt.savefig(pp, format='pdf')
                #plt.show()
                plt.close()

                self.pwdis = copy(
                    pairwise_distances(self.transformedAllITML,
                                       metric='euclidean'))
                #sigmas=[1,2.5,2,2.5,3,3.5,4,4.5,5]
                #for sigma in sigmas:

                self.D = np.zeros(self.pwdis.shape)
                for i in range(0, self.pwdis.shape[0]):
                    l1 = self.pwdis[i].tolist()
                    #print 'l1 is ',l1,'\n\n'
                    allnearestNeighbours = sorted(range(len(l1)),
                                                  key=lambda i: l1[i])
                    #now set the all the weights except for k+1 to 0
                    #since we exponentiated the distances with the minus sign we need to set the lowest weights to 0, so everything except for the last k go to 0
                    self.pwdis[i, allnearestNeighbours[k:]] = 0
                    self.D[i, i] = sum(self.pwdis[i])

                print 'accuracy for ITML for k= ', k, ' and num_constraints=' + str(
                    num_constraints), '\n'
                self.labelPropogation()
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

print 'Y is ',Y.shape
print 'X.shape is ',X.shape
itml = ITML()

num_constraints = 200
C = ITML.prepare_constraints(Y, X.shape[0], num_constraints)
itml.fit(X, C, verbose=False)
x2=itml.transform(X)

print 'x2 is ',x2
l=itml.transformer()
print '\n\n\nafter transforming is ',np.dot(X,l.T)
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris
from scipy.sparse import rand

x = rand(10, 10)

print 'S is ', x.todense()

x = x.todense()

mat1 = np.zeros(x.shape)
for i in range(0, mat1.shape[0]):
    mat1[i, i] = 112.0
    for j in range(0, mat1.shape[1]):
        if i == j:
            continue
        mat1[i, j] = x[i, j]

print 'mat1 is ', mat1
y = np.ones((10, ))
y[5:] = 0

itml = ITML()
print 'X is ', mat1.shape, ' y is ', y.shape
num_constraints = 5
C = ITML.prepare_constraints(y, mat1.shape[0], num_constraints)
itml.fit(mat1, C, verbose=False)
xl = itml.transform(mat1)
print 'xl is ', xl
예제 #10
0
 def fit(self, X, y):
     num_constraints = NUM_CONSTRAINTS
     constraints = ITML.prepare_constraints(y, len(X), num_constraints)
     return super(ITML_sk, self).fit(X, constraints)
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

print 'Y is ', Y.shape
print 'X.shape is ', X.shape
itml = ITML()

num_constraints = 200
C = ITML.prepare_constraints(Y, X.shape[0], num_constraints)
itml.fit(X, C, verbose=False)
x2 = itml.transform(X)

print 'x2 is ', x2
l = itml.transformer()
print '\n\n\nafter transforming is ', np.dot(X, l.T)