def fit(self, x, y, testx, testy, verbose=False, autosave=True): self.x = x = x.reshape(x.shape[0], self.M, -1) self.y = y self.testx = testx = testx.reshape(testx.shape[0], self.M, -1) self.testy = y self.Ms = [] self.PCAs = [] for i in xrange(self.M): trainx, testx, _pca = pca(self.x[:, i, :], self.testx[:, i, :], self.Ppca, verbose=verbose, get_pca=True) lmnn = LMNN_GPU(K=self.K, mu=self.mu, maxS=self.maxS, dim=trainx.shape[1]) print '[%d]pre-train-acc: %.3f%% pre-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) lmnn.fit(trainx, y, lr=self.lr, max_iter=self.max_iter, reset=self.reset, verbose=False) print '[%d]post-train-acc: %.3f%% post-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) self.Ms.append(lmnn) self.PCAs.append(_pca) trainx = self.transform(self.x) testx = self.transform(self.testx) dim = trainx.shape[1] print 'train-acc: %.3f%% test-acc: %.3f%% %s'%(knn(trainx, trainx, y, y, np.eye(dim), self.K), knn(trainx, testx, y, testy, np.eye(dim), self.K), ' '*30) if autosave: print 'Auto saving ...' cPickle.dump(self, open('temp.MLMNN', 'w')) return self
def fittest(self, trainx, testx, trainy, testy, stage): __x = self.transform(trainx, stage) __testx = self.transform(testx, stage) train_acc, train_cfm = knn(__x, __x, trainy, trainy, None, self.K, cfmatrix=True) test_acc, test_cfm = knn(__x, __testx, trainy, testy, None, self.K, cfmatrix=True) print 'shape: {}'.format(__x.shape) print 'train-acc: %.3f%% test-acc: %.3f%% %s'%(\ train_acc, test_acc, ' '*30) print 'train confusion matrix:\n {}\ntest confusion matrix:\n {}'.format( train_cfm, test_cfm)
def fittest(self, trainx, testx, trainy, testy, G=-1, alpha=None): trainx = self.transform(trainx, G, alpha=alpha) testx = self.transform(testx, G, alpha=alpha) train_acc, train_cfm = knn(trainx, trainx, trainy, trainy, None, self.K, cfmatrix=True) test_acc, test_cfm = knn(trainx, testx, trainy, testy, None, self.K, cfmatrix=True) if self.verbose: print 'shape: {}'.format(trainx.shape) if self.verbose: print 'train-acc: %.3f%% %s'%(train_acc, ' '*30) if self.verbose: print 'train confusion matrix:\n {}'.format(train_cfm) if self.verbose: print 'test-acc: %.3f%%'%(test_acc) print 'test confusion matrix:\n {}'.format(test_cfm) return train_acc, test_acc
def fittest(self, trainx, testx, trainy, testy, test_range=None, **kwargs): if test_range==None: test_range = [None] for ranges in test_range: trainx = self.transform(trainx, ranges, **kwargs) testx = self.transform(testx, ranges, **kwargs) train_acc, train_cfm = knn(trainx, trainx, trainy, trainy, None, self.K, cfmatrix=True) if testx != None: test_acc, test_cfm = knn(trainx, testx, trainy, testy, None, self.K, cfmatrix=True) print 'shape: {}'.format(trainx.shape) print 'train-acc: %.3f%% %s'%(train_acc, ' '*30) print 'train confusion matrix:\n {}'.format(train_cfm) print 'test-acc: %.3f%%'%(test_acc) print 'test confusion matrix:\n {}'.format(test_cfm) return train_acc, test_acc
def fittest(self, trainx, testx, y, testy, M=-1): __x = self.transform(trainx, M) if testx != None: __testx = self.transform(testx, M) train_acc, train_cfm = knn(__x, __x, y, y, None, self.K, cfmatrix=True) if testx != None: test_acc, test_cfm = knn(__x, __testx, y, testy, None, self.K, cfmatrix=True) if self.verbose: print 'shape: {}'.format(__x.shape) if self.verbose: print 'train-acc: %.3f%% %s'%(train_acc, ' '*30) if self.verbose: print 'train confusion matrix:\n {}'.format(train_cfm) if testx != None and self.verbose: print 'test-acc: %.3f%%'%(test_acc) print 'test confusion matrix:\n {}'.format(test_cfm) if testx != None: return train_acc, test_acc else: return test_acc
def fit(self, x, y, testx, testy, verbose=False, autosave=True): self.x = x = x.reshape(x.shape[0], self.M, -1) self.y = y self.testx = testx = testx.reshape(testx.shape[0], self.M, -1) self.testy = y self.Ms = [] self.PCAs = [] for i in xrange(self.M): trainx, testx, _pca = pca(self.x[:, i, :], self.testx[:, i, :], self.Ppca, verbose=verbose, get_pca=True) lmnn = LMNN_GPU(K=self.K, mu=self.mu, maxS=self.maxS, dim=trainx.shape[1]) print '[%d]pre-train-acc: %.3f%% pre-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) lmnn.fit(trainx, y, lr=self.lr, max_iter=self.max_iter, reset=self.reset, verbose=False) print '[%d]post-train-acc: %.3f%% post-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) self.Ms.append(lmnn) self.PCAs.append(_pca) trainx = self.transform(self.x) testx = self.transform(self.testx) trainx[np.isnan(trainx)] = 0 trainx[np.isinf(trainx)] = 0 testx[np.isnan(testx)] = 0 testx[np.isinf(testx)] = 0 print 'Final train shape:',trainx.shape print 'Final test shape:',testx.shape trainx, testx = pca(trainx, testx, self.Ppca, verbose=True) print 'Final train shape:',trainx.shape print 'Final test shape:',testx.shape # integrated metric lmnn = LMNN_GPU(K=self.K, mu=self.mu, maxS=10000, dim=trainx.shape[1]) for i in xrange(10): print '[%d]pre-train-acc: %.3f%% pre-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) lmnn.fit(trainx, y, lr=2e-5, max_iter=50, reset=50, verbose=False) print '[%d]post-train-acc: %.3f%% post-test-acc: %.3f%% %s'%(i, knn(trainx, trainx, y, y, lmnn._M, self.K), knn(trainx, testx, y, testy, lmnn._M, self.K), ' '*30) if autosave: print 'Auto saving ...' cPickle.dump(self, open('temp.MLMNN', 'w')) return self
def fit(self, x, y, testx, testy, maxS=100, lr=1e-7, max_iter=100, reset=20, Part=None, verbose=False, autosave=True): # if Part is not None and Part != self.M: x = x.reshape(x.shape[0], self.M, -1)[:, :Part, :].reshape(x.shape[0], -1) testx = testx.reshape(testx.shape[0], self.M, -1)[:, :Part, :].reshape(testx.shape[0], -1) self.M = Part # normalize in each part #x = normalize(x.reshape(x.shape[0]*self.M, -1), 'l1').reshape(x.shape[0], -1) #testx = normalize(testx.reshape(testx.shape[0]*self.M, -1), 'l1').reshape(testx.shape[0], -1) # normalize in each sample if self.kernelf: x = self.kernelf(x) testx = self.kernelf(testx) if self.normalize_axis: x = normalize(x, axis=self.normalize_axis) testx = normalize(testx, axis=self.normalize_axis) self.maxS = maxS orix = x if self.dim == -1: # no pca needed print x.shape self.dim = x.shape[1]/self.M self.didpca = False else: # do the pca and store the solver self.didpca = True x = x.reshape(x.shape[0] * self.M, -1) print 'Splited x.shape:', x.shape pca = PCA(n_components=self.dim) x = pca.fit_transform(x) self.pca = pca print x.shape x = x.reshape(x.shape[0], self.M, self.dim) print 'Final x.shape:', x.shape stackx = x.copy() for i in xrange(1, self.M): stackx[i] += stackx[i-1] try: lM = self.lM gM = self.gM except: self.build() lM = self.lM gM = self.gM updates = [] givens = {self._x: np.asarray(x, dtype='float32'), self._y: np.asarray(y, dtype='int32')} if self.localM: updates.extend(self.lupdate) if self.globalM: updates.extend(self.gupdate) givens.update({self._stackx: np.asarray(stackx, dtype='float32')}) self.train_local_model = theano.function( [self._set, self._neighborpairs, self._lr], [ T.stack(self.lpullerror), T.stack(self.gpullerror), T.stack(self.lpusherror), T.stack(self.gpusherror)], updates = updates, givens = givens) # get_debug = theano.function( # [self._set], # self.debug, # givens = {self._stackx: np.asarray(stackx, dtype='float32')}) __x = x lr = np.array([lr]*(self.M*2), dtype='float32') for _ in xrange(40): neighbors = self._get_neighbors(__x, y) t = 0 while t < max_iter: if t % reset == 0: active_set = self._get_active_set(x, y, neighbors) last_error = np.array([np.inf]*(self.M*2)) print 'Iter: {} lr: {} '.format(t, lr) res = np.array(self.train_local_model(active_set, neighbors, lr)).reshape(-1, self.M*2) error = res.T.sum(1) print '\tlpull:{}\tgpull:{}\n\tlpush:{}\tgpush:{}'.\ format(res[0, :self.M], res[0, self.M:],\ res[1, :self.M], res[1, self.M:]) #print np.array(get_debug(active_set)) for i in xrange(self.M): _M = lM[i].get_value() lM[i].set_value(np.array(self._numpy_project_sd(_M), dtype='float32')) for i in xrange(self.M): _M = gM[i].get_value() gM[i].set_value(np.array(self._numpy_project_sd(_M), dtype='float32')) lr = lr*1.01*(last_error>error) + lr*0.5*(last_error<=error) last_error = error t += 1 __x = self.transform(orix) __testx = self.transform(testx) train_acc, train_cfm = knn(__x, __x, y, y, None, self.K, cfmatrix=True) test_acc, test_cfm = knn(__x, __testx, y, testy, None, self.K, cfmatrix=True) print 'shape: {}'.format(__x.shape) print 'train-acc: %.3f%% test-acc: %.3f%% %s'%(\ train_acc, test_acc, ' '*30) print 'train confusion matrix:\n {}\ntest confusion matrix:\n {}'.format( train_cfm, test_cfm) # __y = label_binarize(y, classes=range(8)) # __testy = label_binarize(testy, classes=range(8)) # svm = OneVsRestClassifier(SVC(kernel='rbf')).fit(__x, __y) # train_acc = float((svm.predict(__x) == __y).sum())/y.shape[0] # test_acc = float((svm.predict(__testx) == __testy).sum())/testy.shape[0] # print '[svm]train-acc: %.3f%% test-acc: %.3f%% %s'%(\ # train_acc, test_acc, ' '*30) # print 'visualizing round{} ...'.format(_) # title = 'round{}.train'.format(_) # visualize(__x, y, title+'_acc{}'.format(train_acc), # './visualized/{}.png'.format(title)) # title = 'round{}.test'.format(_) # visualize(__testx, testy, title+'_acc{}'.format(test_acc), # './visualized/{}.png'.format(title)) if autosave: print 'Auto saving ...' self.save('temp.MLMNN') return self
print trainy.shape # def kernel(x, y): # K = np.zeros((x.shape[0], y.shape[0])) # for i in xrange(x.shape[0]): # for j in xrange(y.shape[0]): # K[i, j] = float((x[i]*y[i]).sum())/((x[i]+y[i])**2).sum() # return K # clf = svm.SVC(kernel='linear', C=100) # pred = clf.fit(trainx, trainy).predict(testx) # print pred # print testy # print '{}/{} = {}'.format( (pred == testy).sum(), len(testy), (pred == testy).sum()/float(len(testy)) ) # print '{}'.format( pred - testy ) print knn(trainx, testx, trainy, testy, M=None, K=2, cfmatrix=True, prediction=True) from sklearn.preprocessing import normalize lmnn = LMNN_duration( dim=200, mu=0.5, K=10, kernelFunction=None, # the explicit kernel normalizeFunction=None, #normalize, # how data will be normalized verbose=True) lmnn.fit(trainx, trainy, testx, testy, tripleCount=10000, learning_rate=1e-6, max_iter=5, reset_iter=10, epochs=10, verbose=True,
variance += ele if variance > 0.95: break components += 1 print 'n_components=%d'%components pca.set_params(n_components=components) pca.fit(trainx) trainx = pca.transform(trainx) testx = pca.transform(testx) K = 9 dim = components # knn(trainx, testx, trainy, testy, np.eye(dim), K) # knn(trainx, testx, trainy, testy, np.random.rand(dim, dim), K) # My LMNN lmnn = LMNN_GPU(K=K, mu=0.5, maxS=10000, dim=dim) print 'pre-train-acc: %.3f%% pre-test-acc: %.3f%% %s'%(knn(trainx, trainx, trainy, trainy, lmnn._M, K), knn(trainx, testx, trainy, testy, lmnn._M, K), ' '*30) lmnn.fit(trainx, trainy, lr=5e-6, max_iter=1000, reset=50, verbose=True) print 'pre-train-acc: %.3f%% pre-test-acc: %.3f%% %s'%(knn(trainx, trainx, trainy, trainy, lmnn._M, K), knn(trainx, testx, trainy, testy, lmnn._M, K), ' '*30) #knn(trainx, testx, trainy, testy, lmnn.M, K) # L = lmnn.L # knn(trainx.dot(L), testx.dot(L), trainy, testy, np.eye(L.shape[1]), K) # # metric learn # lmnn = _LMNN(k=K, learn_rate=1e-5, max_iter=200) # L = lmnn.fit(trainx, trainy, verbose=True).L # knn(trainx, testx, trainy, testy, L.dot(L), K)