Ejemplo n.º 1
0
def test_preprocessor_weakly_supervised(preprocessor, tuples, y_tuples):
    """Tests different ways to use the preprocessor argument: an array,
  a class callable, and a function callable, with a weakly supervised
  algorithm
  """
    nca = ITML(preprocessor=preprocessor)
    nca.fit(tuples, y_tuples)
Ejemplo n.º 2
0
def test_preprocessor_weakly_supervised(preprocessor, tuples, y_tuples):
  """Tests different ways to use the preprocessor argument: an array,
  a class callable, and a function callable, with a weakly supervised
  algorithm
  """
  nca = ITML(preprocessor=preprocessor)
  nca.fit(tuples, y_tuples)
Ejemplo n.º 3
0
def doClustering(X = None, y = None, initial = False, silent = True, numClusters = 4):
    takekmeans = True
    takeoptics = False
    if not silent:  print("- doClustering")

    X, y = rd.readTransformedData()

    # metric learning

    X2 = X.iloc[:, 0:].values

    if initial == False:

        votesX, votesY = rd.readFeedbackData()
        pairs = []
        for index, row in votesX.iterrows():
            pairs.append((X2[row["id_punkt1"]], X2[row["id_punkt2"]]))

        a = votesY

        itml = ITML()
        itml.fit(pairs, a)
        if not silent:  print("Transform")

        X2 = itml.transform(X2)

    if takekmeans == True:
        # Compute kMeans
#        print("numCluster",numClusters)
#        number_clusters = numClusters
        kmeans = KMeans(n_clusters=numClusters , random_state=0).fit(X2)
        labels = kmeans.labels_
        labels_true = y
        core_samples_mask = [0] * len(y)
    elif takeoptics == True:
        opt = OPTICS(min_samples=30, xi=.05)
#        opt = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)
        opt.fit(X2)
        labels = opt.labels_
        labels_true = y
        core_samples_mask = [0] * len(y)
    else:
        # Compute DBSCAN
    #    db = DBSCAN(eps=0.1, min_samples=10).fit(X2)
        db = DBSCAN(eps=0.6, min_samples=5).fit(X2)
        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
        core_samples_mask[db.core_sample_indices_] = True
        labels = db.labels_
        labels_true = y

    writeClusteringResult(X2, labels, labels_true, core_samples_mask)

    if not silent:  print("+ doClustering")
    return 1
Ejemplo n.º 4
0
def test_bounds_parameters_valid(bounds):
    """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    itml_supervised.fit(X, y, bounds=bounds)
Ejemplo n.º 5
0
def test_bounds_parameters_valid(bounds):
  """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  itml_supervised.fit(X, y, bounds=bounds)
Ejemplo n.º 6
0
def test_bounds_parameters_invalid(bounds):
    """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    with pytest.raises(Exception):
        itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    with pytest.raises(Exception):
        itml_supervised.fit(X, y, bounds=bounds)
Ejemplo n.º 7
0
def test_bounds_parameters_invalid(bounds):
  """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  with pytest.raises(Exception):
    itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  with pytest.raises(Exception):
    itml_supervised.fit(X, y, bounds=bounds)
Ejemplo n.º 8
0
    def constructSimilartyMatrixITML(self, k=5):
        print 'Now doing itml'
        num_constraints = 100
        itml = ITML()
        C = ITML.prepare_constraints(self.y_train,
                                     self.trainVectorsPCA.shape[0],
                                     num_constraints)
        itml.fit(self.trainVectorsPCA, C, verbose=True)
        self.L_itml = itml.transformer()

        name = 'itml/ITML transformer matrix with dataset shape ' + str(
            self.trainVectorsPCA.shape)
        print 'L itml shape  is ', self.L_itml.shape
        np.save(name, self.L_itml)

        # Input data transformed to the metric space by X*L.T

        self.transformedTrainITML = copy(itml.transform(self.trainVectorsPCA))
        self.transformedTestITML = copy(itml.transform(self.testVectorsPCA))
        self.transformedAllITML = copy(itml.transform(self.allDataPCA))
        # now we can simply calculate the eucledian distances on the above transformed dataset

        #Visualizing the dataset by TSNE
        projectedDigits = TSNE(random_state=randomState).fit_transform(
            self.transformedTrainITML)

        plt.scatter(projectedDigits[:, 0],
                    projectedDigits[:, 1],
                    c=self.y_train)
        plt.title(
            'ITML Transformed Train set projected to 2 Dimensions by TSNE with k='
            + str(k) + ' and num_constraints = ' + str(num_constraints))
        plt.savefig(pp, format='pdf')
        self.pwdis = copy(
            pairwise_distances(self.transformedAllITML, metric='euclidean'))
        self.D = np.zeros(self.pwdis.shape)
        for i in range(0, self.pwdis.shape[0]):
            l1 = self.pwdis[i].tolist()
            #print 'l1 is ',l1,'\n\n'
            allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i])
            #now set the all the weights except for k+1 to 0
            self.pwdis[i, allnearestNeighbours[k:]] = 0
            self.D[i, i] = sum(self.pwdis[i])

        print 'accuracy for ITML\n'
        self.labelPropogation()
 def constructSimilartyMatrixITML(self,k=5):
     print 'Now doing itml'
     num_constraints=100
     itml=ITML()
     C = ITML.prepare_constraints(self.y_train, self.trainVectorsPCA.shape[0], num_constraints)
     itml.fit(self.trainVectorsPCA, C, verbose=True)
     self.L_itml=itml.transformer()
     
     name='itml/ITML transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape)
     print 'L itml shape  is ',self.L_itml.shape
     np.save(name,self.L_itml)
     
     # Input data transformed to the metric space by X*L.T
     
     self.transformedTrainITML=copy(itml.transform(self.trainVectorsPCA))
     self.transformedTestITML=copy(itml.transform(self.testVectorsPCA))
     self.transformedAllITML=copy(itml.transform(self.allDataPCA))
     # now we can simply calculate the eucledian distances on the above transformed dataset
     
     #Visualizing the dataset by TSNE
     projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedTrainITML)
     
     plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.y_train)
     plt.title('ITML Transformed Train set projected to 2 Dimensions by TSNE with k='+str(k)+' and num_constraints = '+str(num_constraints))
     plt.savefig(pp,format='pdf')
     self.pwdis=copy(pairwise_distances(self.transformedAllITML,metric='euclidean'))
     self.D=np.zeros(self.pwdis.shape)
     for i in range(0,self.pwdis.shape[0]):
         l1=self.pwdis[i].tolist()
         #print 'l1 is ',l1,'\n\n'
         allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
         #now set the all the weights except for k+1 to 0
         self.pwdis[i,allnearestNeighbours[k:]]=0
         self.D[i,i]=sum(self.pwdis[i])
     
     print 'accuracy for ITML\n'
     self.labelPropogation()
Ejemplo n.º 10
0
    def constructSimilartyMatrixITML(self):

        print 'Now doing itml'
        counter = 1
        ks = [
            3, 5, 7, 10, 12, 15, 20, 22, 25, 27, 30, 33, 35, 37, 40, 43, 45,
            47, 50, 53, 55, 57, 60, 65
        ]

        constraints = [80, 100, 120, 150, 180, 200]
        constraints = [100]
        for k in ks:
            for num_constraints in constraints:

                itml = ITML()
                self.y_train = self.y_train.reshape(-1, )
                C = ITML.prepare_constraints(self.y_train,
                                             self.trainVectorsPCA.shape[0],
                                             num_constraints)
                itml.fit(self.trainVectorsPCA, C, verbose=True)
                self.L_itml = copy(itml.transformer())

                name = 'itml/ITML transformer matrix with dataset shape ' + str(
                    self.trainVectorsPCA.shape) + ' and k=' + str(
                        k) + ' and num_constraints=' + str(num_constraints)
                #print 'L itml shape  is ',self.L_itml.shape
                np.save(name, self.L_itml)

                # Input data transformed to the metric space by X*L.T

                self.transformedTrainITML = copy(
                    itml.transform(self.trainVectorsPCA))
                self.transformedTestITML = copy(
                    itml.transform(self.testVectorsPCA))
                self.transformedAllITML = copy(itml.transform(self.allDataPCA))
                # now we can simply calculate the eucledian distances on the above transformed dataset

                #Visualizing the dataset by TSNE
                projectedDigits = TSNE(random_state=randomState).fit_transform(
                    self.transformedAllITML)

                print 'projectedDigits is ', projectedDigits.shape
                plt.figure()
                plt.scatter(projectedDigits[:, 0],
                            projectedDigits[:, 1],
                            c=self.labels)
                plt.title(
                    'ITML Transformed ALL set projected to 2 Dimensions by TSNE with'
                    + str(k) + ' and num_constraints=' + str(num_constraints))
                plt.savefig(pp, format='pdf')
                #plt.show()
                plt.close()

                self.pwdis = copy(
                    pairwise_distances(self.transformedAllITML,
                                       metric='euclidean'))
                #sigmas=[1,2.5,2,2.5,3,3.5,4,4.5,5]
                #for sigma in sigmas:

                self.D = np.zeros(self.pwdis.shape)
                for i in range(0, self.pwdis.shape[0]):
                    l1 = self.pwdis[i].tolist()
                    #print 'l1 is ',l1,'\n\n'
                    allnearestNeighbours = sorted(range(len(l1)),
                                                  key=lambda i: l1[i])
                    #now set the all the weights except for k+1 to 0
                    #since we exponentiated the distances with the minus sign we need to set the lowest weights to 0, so everything except for the last k go to 0
                    self.pwdis[i, allnearestNeighbours[k:]] = 0
                    self.D[i, i] = sum(self.pwdis[i])

                print 'accuracy for ITML for k= ', k, ' and num_constraints=' + str(
                    num_constraints), '\n'
                self.labelPropogation()
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

print 'Y is ',Y.shape
print 'X.shape is ',X.shape
itml = ITML()

num_constraints = 200
C = ITML.prepare_constraints(Y, X.shape[0], num_constraints)
itml.fit(X, C, verbose=False)
x2=itml.transform(X)

print 'x2 is ',x2
l=itml.transformer()
print '\n\n\nafter transforming is ',np.dot(X,l.T)
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris
from scipy.sparse import rand

x = rand(10, 10)

print 'S is ', x.todense()

x = x.todense()

mat1 = np.zeros(x.shape)
for i in range(0, mat1.shape[0]):
    mat1[i, i] = 112.0
    for j in range(0, mat1.shape[1]):
        if i == j:
            continue
        mat1[i, j] = x[i, j]

print 'mat1 is ', mat1
y = np.ones((10, ))
y[5:] = 0

itml = ITML()
print 'X is ', mat1.shape, ' y is ', y.shape
num_constraints = 5
C = ITML.prepare_constraints(y, mat1.shape[0], num_constraints)
itml.fit(mat1, C, verbose=False)
xl = itml.transform(mat1)
print 'xl is ', xl
    y_lims = (y_center - max_diff / 2 - margin,
              y_center + max_diff / 2 + margin)
    x_lims = (x_center - max_diff / 2 - margin,
              x_center + max_diff / 2 + margin)

    plt.figure()

    for i, edge in enumerate(pairs):
        plt.plot(edge[:, 0],
                 edge[:, 1],
                 c='green' if y_pairs[i] == 1 else 'red',
                 alpha=0.3)
    plt.scatter(pairs[:, 0, 0], pairs[:, 0, 1], c='b')
    plt.scatter(pairs[:, 1, 0], pairs[:, 1, 1], c='b')
    plt.xlim(*x_lims)
    plt.ylim(*y_lims)
    plt.axis('equal')
    plt.savefig(name)


plot_points(pairs, y_pairs, 'pairs_without_metric')

mmc = ITML()
mmc.fit(pairs, y_pairs)

X_e = mmc.transform(X)
pairs = X_e[c].copy()

plot_points(pairs, y_pairs, 'pairs_with_metric')
import numpy as np
from metric_learn import ITML
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

print 'Y is ', Y.shape
print 'X.shape is ', X.shape
itml = ITML()

num_constraints = 200
C = ITML.prepare_constraints(Y, X.shape[0], num_constraints)
itml.fit(X, C, verbose=False)
x2 = itml.transform(X)

print 'x2 is ', x2
l = itml.transformer()
print '\n\n\nafter transforming is ', np.dot(X, l.T)