Ejemplo n.º 1
0
    def fit(self, x, y, testx, testy,
        maxS=100, lr=1e-7, max_iter=100, reset=20,
        verbose=False, autosave=True): #

        #x = normalize(x.reshape(x.shape[0]*self.M, -1)).reshape(x.shape[0], -1)
        #testx = normalize(testx.reshape(testx.shape[0]*self.M, -1)).reshape(testx.shape[0], -1)
        x = normalize(x)
        testx = normalize(testx)

        self.maxS = maxS
        orix = x
         
        if self.dim == -1: # no pca needed
            self.dim = x.shape[1]
        else: # do the pca and store the solver
            x = x.reshape(x.shape[0] * self.M, -1)

            pca = PCA(n_components=self.dim)
            x = pca.fit_transform(x)
            self.pca = pca
        x = x.reshape(-1, self.M, self.dim)
        stackx = x.copy()
        for i in xrange(1, self.M):
            stackx[i] += stackx[i-1]

        try:
            lM = self.lM
            gM = self.gM
        except:
            self.build()
            lM = self.lM
            gM = self.gM

        updates = []  
        givens = {self._x: np.asarray(x, dtype='float32'),
                  self._y: np.asarray(y, dtype='int32')}
        if self.localM: updates.extend(self.lupdate)
        if self.globalM: 
            updates.extend(self.gupdate)
            givens.update({self._stackx: np.asarray(stackx, dtype='float32')})

        self.train_local_model = theano.function(
            [self._set, self._neighborpairs, self._lr], 
            [T.stack(self.lpullerror), 
            T.stack(self.gpullerror), 
            T.stack(self.lpusherror),
            T.stack(self.gpusherror)],
            updates = updates,
            givens = givens)

        __x = x
        lr = np.array([lr]*(self.M*2), dtype='float32')
        for _ in xrange(40):
            neighbors = self._get_neighbors(__x, y)
            t = 0
            while t < max_iter:
                if t % reset == 0:
                    active_set = self._get_active_set(x, y, neighbors)
                    last_error = np.array([np.inf]*(self.M*2))

                print 'Iter: {} lr: {} '.format(t, lr)

                res = np.array(self.train_local_model(active_set, neighbors, lr)).reshape(-1, self.M*2)
                error = res.T.sum(1)
                print '\tlpull:{}\tgpull:{}\n\tlpush:{}\tgpush:{}'.\
                    format(res[0, :self.M], res[0, self.M:],\
                           res[1, :self.M], res[1, self.M:])

                for i in xrange(self.M):
                    _M = lM[i].get_value() 
                    lM[i].set_value(np.array(self._numpy_project_sd(_M), dtype='float32'))
                for i in xrange(self.M):
                    _M = gM[i].get_value() 
                    gM[i].set_value(np.array(self._numpy_project_sd(_M), dtype='float32'))

                lr = lr*1.01*(last_error>error) + lr*0.5*(last_error<=error) 

                last_error = error
                t += 1
            

            __x = self.transform(orix)
            __testx = self.transform(testx)
            train_acc, train_cfm = knn(__x, __x, y, y, None, self.K, cfmatrix=True)

            test_acc, test_cfm = knn(__x, __testx, y, testy, None, self.K, cfmatrix=True)
            print 'shape: {}'.format(__x.shape)
            print 'train-acc: %.3f%% test-acc: %.3f%% %s'%(\
                train_acc, test_acc, ' '*30)
            print 'train confusion matrix:\n {}\ntest confusion matrix:\n {}'.format(
                train_cfm, test_cfm)

#           __y = label_binarize(y, classes=range(8))
#           __testy = label_binarize(testy, classes=range(8))
#           svm = OneVsRestClassifier(SVC(kernel='rbf')).fit(__x, __y)
#           train_acc = float((svm.predict(__x) == __y).sum())/y.shape[0]
#           test_acc = float((svm.predict(__testx) == __testy).sum())/testy.shape[0]
#           print '[svm]train-acc: %.3f%% test-acc: %.3f%% %s'%(\
#               train_acc, test_acc, ' '*30)

            print 'visualizing round{} ...'.format(_)
            title = 'round{}.train'.format(_) 
            visualize(__x, y, title+'_acc{}'.format(train_acc), 
                './visualized/{}.png'.format(title))
            title = 'round{}.test'.format(_) 
            visualize(__testx, testy, title+'_acc{}'.format(test_acc),
                './visualized/{}.png'.format(title))



        if autosave:
            print 'Auto saving ...'
            self.save('temp.MLMNN')
        return self
Ejemplo n.º 2
0
import matplotlib
matplotlib.use('Agg')
from tSNE.draw import visualize
import cPickle

if __name__ == '__main__':
    K = 100 
    Time = 1.0
    M = 10

    trainx, testx, trainy, testy = cPickle.load(open('./features/[K={}][T={}]BoWInGroup.pkl'.format(K, Time),'r'))
    print trainx.shape
    print trainy.shape

    visualize(trainx, trainy, './tSNE/pre-Train.png')
    visualize(testx, testy, './tSNE/pre-Test.png')
   
    from MLMNN import MLMNN
    mlmnn = MLMNN.load('./temp.MLMNN')
    trainx = mlmnn.transform(trainx)
    testx = mlmnn.transform(testx)
    print trainx.shape

    visualize(trainx, trainy, './tSNE/post-Train.png')
    visualize(testx, testy, './tSNE/post-Test.png')