예제 #1
0
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([6,6], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})), # Does whitening
        pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True},
                    trainer = pipeline.OMPTrainer(
                            {'k': 800, 'max_iter':100})), # does encoding
            pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 50000)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'r') as fid:
        conv = pickle.load(fid)
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = True)
    mpi.dump_matrix_multi(Xtrain,
                          os.path.join(FLAGS.output_dir, 
                                       FLAGS.feature_file+'_train'))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = True)
    mpi.dump_matrix_multi(Xtest,
                          os.path.join(FLAGS.output_dir, 
                                       FLAGS.feature_file+'_test'))
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \
            / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \
            / float(len(Ytest))
    
    logging.info('Training accuracy: %f' % accu)
    logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info("Loading cifar data...")
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    # try: use sub images
    # cifar = datasets.SubImageSet(cifar, [28,28], 1)
    # cifar_test = datasets.CenterRegionSet(cifar_test, [28,28])

    conv = pipeline.ConvLayer(
        [
            pipeline.PatchExtractor([6, 6], 1),  # extracts patches
            pipeline.MeanvarNormalizer({"reg": 10}),  # normalizes the patches
            pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({"reg": 0.1})),  # Does whitening
            pipeline.ThresholdEncoder(
                {"alpha": 0.25, "twoside": True}, trainer=pipeline.OMPTrainer({"k": 1600, "max_iter": 100})
            ),  # does encoding
            pipeline.SpatialPooler({"grid": (4, 4), "method": "max"}),  # average pool
        ]
    )
    logging.info("Training the pipeline...")
    conv.train(cifar, 400000)
    logging.info("Dumping the pipeline...")
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), "w") as fid:
            pickle.dump(conv, fid)
            fid.close()
    logging.info("Extracting features...")
    Xtrain = conv.process_dataset(cifar, as_2d=True)
    mpi.dump_matrix_multi(Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_train"))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=True)
    mpi.dump_matrix_multi(Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_test"))
    Ytest = cifar_test.labels().astype(np.int)
    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), "w") as fid:
            pickle.dump({"m": m, "std": std, "w": w, "b": b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain, w) + b).argmax(axis=1)) / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest, w) + b).argmax(axis=1)) / float(len(Ytest))

    logging.info("Training accuracy: %f" % accu)
    logging.info("Testing accuracy: %f" % accu_test)
예제 #3
0
 def testFeatureMeanStd(self):
     mat = np.random.rand(100,50)
     m_test, std_test = classifier.feature_meanstd(mat)
     # use the naive approach to compute the mean and std
     mats = mpi.COMM.gather(mat)
     if mpi.is_root():
         mats = np.vstack(mats)
         m = mats.mean(0)
         std = mats.std(0)
     else:
         m = None
         std = None
     m = mpi.COMM.bcast(m)
     std = mpi.COMM.bcast(std)
     np.testing.assert_array_almost_equal(m, m_test)
     np.testing.assert_array_almost_equal(std, std_test)
def bird_demo():
    logging.info('Loading data...')
    bird = visiondata.CUBDataset(FLAGS.root, is_training=True, crop=FLAGS.crop, 
                                 version=FLAGS.version, prefetch=True,
                                 target_size = TARGET_SIZE)
    bird_test = visiondata.CUBDataset(FLAGS.root, is_training=False, crop=FLAGS.crop, 
                                 version=FLAGS.version, prefetch=True,
                                 target_size = TARGET_SIZE)
    if FLAGS.mirrored:
        bird = datasets.MirrorSet(bird)
    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True},
                    trainer = pipeline.OMPTrainer(
                         {'k': FLAGS.k, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': 4, 'method': 'max'})
            ],
            fixed_size = True)
    logging.info('Training the pipeline...')
    conv.train(bird, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(bird, as_2d = True)
    Ytrain = bird.labels().astype(np.int)
    Xtest = conv.process_dataset(bird_test, as_2d = True)
    Ytest = bird_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005,
                                     fminargs={'maxfun': 1000})
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
    mpi.root_pickle((m, std, w, b, conv[-2].dictionary), 'debug_features.pickle')
def stl_demo():
    """Performs a demo classification on stl
    """
    logging.info('Loading stl data...')
    stl = visiondata.STL10Dataset(FLAGS.root, 'unlabeled', target_size=32)
    stl_train = visiondata.STL10Dataset(FLAGS.root, 'train', target_size=32)
    stl_test = visiondata.STL10Dataset(FLAGS.root, 'test', target_size=32)

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([6, 6], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False},
                    trainer = pipeline.NormalizedKmeansTrainer(
                         {'k': FLAGS.fromdim, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(stl, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    X = conv.process_dataset(stl, as_2d = False)
    Xtrain = conv.process_dataset(stl_train, as_2d = False)
    Ytrain = stl_train.labels().astype(np.int)
    Xtest = conv.process_dataset(stl_test, as_2d = False)
    Ytest = stl_test.labels().astype(np.int)
    
    # before we do feature computation, try to do dimensionality reduction
    X.resize(np.prod(X.shape[:-1]), X.shape[-1])
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])
    
    m, std = classifier.feature_meanstd(X, 0.01)
    X -= m
    X /= std
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(X)
    
    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(X, current_dim, tol=current_dim * 0.01)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                sel = mpi.COMM.bcast(sel)
                Cpred = covmat[sel]
                Csel = Cpred[:,sel]
                W = np.linalg.solve(Csel, Cpred)
                # perform svd
                U, D, _ = np.linalg.svd(W, full_matrices = 0)
                U *= D
                Xtrain_red = np.dot(Xtrain[:, sel], U)
                Xtest_red = np.dot(Xtest[:, sel], U)
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
            
        w, b = classifier.l2svm_onevsall(Xtrain_red, Ytrain, 0.005,
                                         fminargs={'disp': 0, 'maxfun': 1000})
        accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' % (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' % (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
예제 #6
0
                #trainer = pipeline.OMPNTrainer(
                #      {'k': 3200, 'num_active': 10, 'max_iter':100})),
        pipeline.SpatialPooler({'grid': (2,2), 'method': 'max'}) # average pool
        ])
logging.info('Training the pipeline...')
conv.train(cifar, 400000, exhaustive = True)
"""
conv = pickle.load(open('cvpr_exemplar_centroids_conv.pickle'))
_, ap_result = pickle.load(open('cvpr_exemplar_centroids.pickle'))

logging.info('Extracting features...')
Xtrain = conv.process_dataset(cifar, as_2d = False)
# we simply use all the features to compute the covmat
Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])

m, std = classifier.feature_meanstd(Xtrain, 0.01)
Xtrain -= m
Xtrain /= std
covmat = mathutil.mpi_cov(Xtrain)

# do subsampling
"""
ap_result = code_ap.code_af(Xtrain, todim)
"""
sel = ap_result[0]
sel = mpi.COMM.bcast(sel)
Cpred = covmat[sel]
Csel = Cpred[:,sel]
Crecon = np.dot(Cpred.T, np.dot(np.linalg.pinv(Csel), Cpred))
Crecon = (Crecon + Crecon.T) / 2
eigval = np.linalg.eigvals(covmat)
예제 #7
0
    def Train(self,feat_list=None,type='logreg',gamma=0.0,domeanstd=True,special_bias=None,add_bias=True, weight=None, class_instance=None, method='sigmoid',factor=10.0,arch=[10],
              cv_feats=None, cv_special_bias=None,cv_class_instance=None):
        if feat_list==None:
            feat_list=self.features
        self.feat_list=feat_list
        self._gamma=gamma
        self._type=type
        self._special_bias = special_bias
        self._add_bias = add_bias
        Xtrain_feats = np.ascontiguousarray(np.hstack((self._Xtrain[feat] for feat in feat_list)))
        self.m, self.std = classifier.feature_meanstd(Xtrain_feats)
        if domeanstd==False: #hacky, overwrite the things we computed
            self.m[:] = 0
            self.std[:] = 1
        Xtrain_feats -= self.m
        Xtrain_feats /= self.std
        if special_bias != None:
            Xtrain_feats = np.ascontiguousarray(np.hstack((Xtrain_feats, special_bias)))
        #CV
        if cv_feats!=None:
            cv_feats = np.ascontiguousarray(np.hstack((cv_feats[feat] for feat in feat_list)))
            cv_feats -= self.m
            cv_feats /= self.std
            if special_bias != None:
                cv_feats = np.ascontiguousarray(np.hstack((cv_feats, cv_special_bias)))
        '''Classifier stage'''
        if type=='linsvm':
            self.w, self.b = classifier.svm_onevsall(Xtrain_feats, self._Ytrain, self._gamma, weight = weight, special_bias=special_bias, add_bias=add_bias)
            return (self.w,self.b)
        elif type=='logreg':
            self.w, self.b = l2logreg_onevsall(Xtrain_feats, self._Ytrain, self._gamma, weight = weight, special_bias=special_bias, add_bias=add_bias)
            return (self.w,self.b)
        elif type=='logreg_atwv':
            self.w, self.b = Train_atwv(Xtrain_feats,class_instance=class_instance,weight=weight,special_bias=special_bias, add_bias=add_bias, method=method, 
                                        factor=factor, gamma=self._gamma, cv_class_instance=cv_class_instance, cv_feats=cv_feats)
        elif type=='nn_atwv':
            self._arch = arch
            self._weights_nn = Train_atwv_nn(Xtrain_feats,class_instance=class_instance,weight=weight,special_bias=special_bias, add_bias=add_bias, 
                                             arch=self._arch, method=method, factor=factor, gamma=self._gamma, cv_class_instance=cv_class_instance, cv_feats=cv_feats)
            #self._weights_nn = Train_atwv_nn(Xtrain_feats,class_instance=class_instance,weight=self._weights_nn,special_bias=special_bias, add_bias=add_bias, 
            #                                 arch=self._arch, method=method, factor=factor*10.0)
        elif type=='nn_debug':
            if mpi.COMM.Get_size() > 1:
                print 'Warning!!! Running NN training with MPI with more than one Node!'
                #FIXME: Collect X and Y at root to avoid this
#                 prob = mpi.COMM.gather(prob)
#                 if mpi.is_root():
#                     np.vstack(prob)
#                     #Train
#                     mpi.COMM.Bcast(self._nn)
#                 mpi.distribute(prob)                
            DS = ClassificationDataSet( Xtrain_feats.shape[1], 1, nb_classes=2 )
            #for i in range(Xtrain_feats.shape[0]):
            #    DS.addSample( Xtrain_feats[i,:], [self._Ytrain[i]] )
            DS.setField('input', Xtrain_feats)
            DS.setField('target', self._Ytrain[:,np.newaxis])
            DS._convertToOneOfMany()
            self._nn = buildNetwork(DS.indim, 10, DS.outdim, outclass=SoftmaxLayer, fast=True)
            self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.01, lrdecay=1.0)
            self._nn_trainer.trainOnDataset(DS,epochs=8)
            self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.001, lrdecay=1.0)
            self._nn_trainer.trainOnDataset(DS,epochs=8)
            self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.0001, lrdecay=1.0)
            self._nn_trainer.trainOnDataset(DS,epochs=5)
            return self._nn
예제 #8
0
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({
            'size': (FLAGS.patch, FLAGS.patch),
            'reg': 0.1
        })
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch],
                                1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=trainer),
        pipeline.ThresholdEncoder({
            'alpha': 0.0,
            'twoside': False
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': FLAGS.fromdim,
                                      'max_iter': 100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)
    if False:
        # directly do dimensionality reduction
        eigval, eigvec = np.linalg.eigh(covmat)
        U = eigvec[:, -FLAGS.todim:]
        Xtrain = np.dot(Xtrain, U)
        Xtest = np.dot(Xtest, U)
    else:
        # do subsampling
        import code_ap
        temp = code_ap.code_af(Xtrain, FLAGS.todim)
        sel = temp[0]
        sel = mpi.COMM.bcast(sel)
        Cpred = covmat[sel]
        Csel = Cpred[:, sel]
        W = np.linalg.solve(Csel, Cpred)
        # perform svd
        U, D, _ = np.linalg.svd(W, full_matrices=0)
        U *= D
        Xtrain = np.dot(Xtrain[:, sel], U)
        Xtest = np.dot(Xtest[:, sel], U)
    Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0])
    Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0])
    """
    # This part is used to do post-pooling over all features nystrom subsampling
    # normalization
    Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))
    Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):]
    #U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """

    w, b = classifier.l2svm_onevsall(Xtrain,
                                     Ytrain,
                                     0.002,
                                     fminargs={
                                         'disp': 0,
                                         'maxfun': 1000
                                     })
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
def bird_demo():
    logging.info('Loading data...')
    bird = visiondata.CUBDataset(FLAGS.root,
                                 is_training=True,
                                 crop=FLAGS.crop,
                                 version=FLAGS.version,
                                 prefetch=True,
                                 target_size=TARGET_SIZE)
    bird_test = visiondata.CUBDataset(FLAGS.root,
                                      is_training=False,
                                      crop=FLAGS.crop,
                                      version=FLAGS.version,
                                      prefetch=True,
                                      target_size=TARGET_SIZE)
    if FLAGS.mirrored:
        bird = datasets.MirrorSet(bird)
    conv = pipeline.ConvLayer(
        [
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch],
                                    1),  # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
            pipeline.LinearEncoder({},
                                   trainer=pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({
                'alpha': 0.25,
                'twoside': True
            },
                                      trainer=pipeline.OMPTrainer(
                                          {
                                              'k': FLAGS.k,
                                              'max_iter': 100
                                          })),
            pipeline.SpatialPooler({
                'grid': 4,
                'method': 'max'
            })
        ],
        fixed_size=True)
    logging.info('Training the pipeline...')
    conv.train(bird, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(bird, as_2d=True)
    Ytrain = bird.labels().astype(np.int)
    Xtest = conv.process_dataset(bird_test, as_2d=True)
    Ytest = bird_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain, reg=0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain,
                                     Ytrain,
                                     0.005,
                                     fminargs={'maxfun': 1000})
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
    mpi.root_pickle((m, std, w, b, conv[-2].dictionary),
                    'debug_features.pickle')
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([6, 6], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False},
                    trainer = pipeline.NormalizedKmeansTrainer(
                         {'k': FLAGS.fromdim, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = False)
    Ytest = cifar_test.labels().astype(np.int)
    
    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])
    
    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    
    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(Xtrain, current_dim)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                Xtrain_red = np.ascontiguousarray(Xtrain[:, sel])
                Xtest_red = np.ascontiguousarray(Xtest[:, sel])
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
            
        w, b = classifier.l2svm_onevsall(Xtrain_red, Ytrain, 0.005,
                                         fminargs={'disp': 0, 'maxfun': 1000})
        accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' % (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' % (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
예제 #11
0
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer(
            {'reg': 0.1})),  # Does whitening
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': True
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': 800,
                                      'max_iter': 100
                                  })),  # does encoding
        pipeline.SpatialPooler({
            'grid': (2, 2),
            'method': 'ave'
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 50000)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),
                  'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), 'r') as fid:
        conv = pickle.load(fid)
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=True)
    mpi.dump_matrix_multi(
        Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_train'))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=True)
    mpi.dump_matrix_multi(
        Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_test'))
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \
            / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \
            / float(len(Ytest))

    logging.info('Training accuracy: %f' % accu)
    logging.info('Testing accuracy: %f' % accu_test)
예제 #12
0
def stl_demo():
    """Performs a demo classification on stl
    """
    logging.info('Loading stl data...')
    stl = visiondata.STL10Dataset(FLAGS.root, 'unlabeled', target_size=32)
    stl_train = visiondata.STL10Dataset(FLAGS.root, 'train', target_size=32)
    stl_test = visiondata.STL10Dataset(FLAGS.root, 'test', target_size=32)

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      FLAGS.fromdim,
                                      'max_iter':
                                      100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(stl, 400000, exhaustive=True)

    logging.info('Extracting features...')
    X = conv.process_dataset(stl, as_2d=False)
    Xtrain = conv.process_dataset(stl_train, as_2d=False)
    Ytrain = stl_train.labels().astype(np.int)
    Xtest = conv.process_dataset(stl_test, as_2d=False)
    Ytest = stl_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    X.resize(np.prod(X.shape[:-1]), X.shape[-1])
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(X, 0.01)
    X -= m
    X /= std
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(X)

    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(X, current_dim, tol=current_dim * 0.01)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                sel = mpi.COMM.bcast(sel)
                Cpred = covmat[sel]
                Csel = Cpred[:, sel]
                W = np.linalg.solve(Csel, Cpred)
                # perform svd
                U, D, _ = np.linalg.svd(W, full_matrices=0)
                U *= D
                Xtrain_red = np.dot(Xtrain[:, sel], U)
                Xtest_red = np.dot(Xtest[:, sel], U)
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])

        w, b = classifier.l2svm_onevsall(Xtrain_red,
                                         Ytrain,
                                         0.005,
                                         fminargs={
                                             'disp': 0,
                                             'maxfun': 1000
                                         })
        accu_train = classifier.Evaluator.accuracy(Ytrain,
                                                   np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest,
                                                  np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    
    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1})
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = trainer),
            pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False},
                    trainer = pipeline.OMPTrainer(
                         {'k': FLAGS.fromdim, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = False)
    Ytest = cifar_test.labels().astype(np.int)
    
    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])
    
    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    if False:
        # directly do dimensionality reduction
        eigval, eigvec = np.linalg.eigh(covmat)
        U = eigvec[:, -FLAGS.todim:]
        Xtrain = np.dot(Xtrain, U)
        Xtest = np.dot(Xtest, U)
    else:
        # do subsampling
        import code_ap
        temp = code_ap.code_af(Xtrain, FLAGS.todim)
        sel = temp[0]
        sel = mpi.COMM.bcast(sel)
        Cpred = covmat[sel]
        Csel = Cpred[:,sel]
        W = np.linalg.solve(Csel, Cpred)
        # perform svd
        U, D, _ = np.linalg.svd(W, full_matrices = 0)
        U *= D
        Xtrain = np.dot(Xtrain[:, sel], U)
        Xtest = np.dot(Xtest[:, sel], U)
    Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0])
    Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0])
    
    
    """
    # This part is used to do post-pooling over all features nystrom subsampling
    # normalization
    Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))
    Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):]
    #U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002,
                                     fminargs={'disp': 0, 'maxfun': 1000})
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      FLAGS.fromdim,
                                      'max_iter':
                                      100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)

    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(Xtrain, current_dim)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                Xtrain_red = np.ascontiguousarray(Xtrain[:, sel])
                Xtest_red = np.ascontiguousarray(Xtest[:, sel])
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])

        w, b = classifier.l2svm_onevsall(Xtrain_red,
                                         Ytrain,
                                         0.005,
                                         fminargs={
                                             'disp': 0,
                                             'maxfun': 1000
                                         })
        accu_train = classifier.Evaluator.accuracy(Ytrain,
                                                   np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest,
                                                  np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    
    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1})
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = trainer),
            pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False},
                    trainer = pipeline.OMPTrainer(
                         {'k': 100, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = True)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = True)
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    """
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    logging.info("Dump oriol")
    mpi.root_pickle((eigval, eigvec), 'cifar_dump_oriol.pickle')
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002,
                                     fminargs={'maxfun': 4000})
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)