def compute_caltech_features(): caltech = datasets.TwoLayerDataset(FLAGS.root, ['jpg'], max_size=300) conv = pipeline.ConvLayer([ dsift.DsiftExtractor(FLAGS.sift_size, FLAGS.sift_stride), pipeline.LLCEncoder({'k': FLAGS.llc_k}, trainer=pipeline.KmeansTrainer( {'k': FLAGS.dict_size})), pipeline.PyramidPooler({ 'level': 3, 'method': 'max' }) ]) conv.train(caltech, 400000) feat = conv.process_dataset(caltech, as_2d=True) mpi.mkdir(FLAGS.feature_dir) if mpi.is_root(): with (open(os.path.join(FLAGS.feature_dir, FLAGS.model_file), 'w')) as fid: pickle.dump(conv, fid) mpi.dump_matrix_multi(feat, os.path.join(FLAGS.feature_dir, FLAGS.feature_file)) mpi.dump_matrix_multi(caltech.labels(), os.path.join(FLAGS.feature_dir, FLAGS.label_file))
def dump(self, target_folder): """Dump the current images to the target folder """ mpi.mkdir(target_folder) for idx in range(self.size()): name = self._raw_name[idx] mpi.mkdir(os.path.join(target_folder, os.path.dirname(name))) misc.imsave(os.path.join(target_folder, name),\ self._read(idx))
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6,6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), # Does whitening pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True}, trainer = pipeline.OMPTrainer( {'k': 800, 'max_iter':100})), # does encoding pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 50000) logging.info('Dumping the pipeline...') if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid: pickle.dump(conv, fid) fid.close() with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'r') as fid: conv = pickle.load(fid) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d = True) mpi.dump_matrix_multi(Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file+'_train')) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d = True) mpi.dump_matrix_multi(Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file+'_test')) Ytest = cifar_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01) if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid: pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid) accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \ / float(len(Ytrain)) accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \ / float(len(Ytest)) logging.info('Training accuracy: %f' % accu) logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info("Loading cifar data...") cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) # try: use sub images # cifar = datasets.SubImageSet(cifar, [28,28], 1) # cifar_test = datasets.CenterRegionSet(cifar_test, [28,28]) conv = pipeline.ConvLayer( [ pipeline.PatchExtractor([6, 6], 1), # extracts patches pipeline.MeanvarNormalizer({"reg": 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({"reg": 0.1})), # Does whitening pipeline.ThresholdEncoder( {"alpha": 0.25, "twoside": True}, trainer=pipeline.OMPTrainer({"k": 1600, "max_iter": 100}) ), # does encoding pipeline.SpatialPooler({"grid": (4, 4), "method": "max"}), # average pool ] ) logging.info("Training the pipeline...") conv.train(cifar, 400000) logging.info("Dumping the pipeline...") if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), "w") as fid: pickle.dump(conv, fid) fid.close() logging.info("Extracting features...") Xtrain = conv.process_dataset(cifar, as_2d=True) mpi.dump_matrix_multi(Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_train")) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d=True) mpi.dump_matrix_multi(Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_test")) Ytest = cifar_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005) if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), "w") as fid: pickle.dump({"m": m, "std": std, "w": w, "b": b}, fid) accu = np.sum(Ytrain == (np.dot(Xtrain, w) + b).argmax(axis=1)) / float(len(Ytrain)) accu_test = np.sum(Ytest == (np.dot(Xtest, w) + b).argmax(axis=1)) / float(len(Ytest)) logging.info("Training accuracy: %f" % accu) logging.info("Testing accuracy: %f" % accu_test)
def compute_caltech_features(): caltech = datasets.TwoLayerDataset(FLAGS.root, ["jpg"], max_size=300) conv = pipeline.ConvLayer( [ dsift.DsiftExtractor(FLAGS.sift_size, FLAGS.sift_stride), pipeline.LLCEncoder({"k": FLAGS.llc_k}, trainer=pipeline.KmeansTrainer({"k": FLAGS.dict_size})), pipeline.PyramidPooler({"level": 3, "method": "max"}), ] ) conv.train(caltech, 400000) feat = conv.process_dataset(caltech, as_2d=True) mpi.mkdir(FLAGS.feature_dir) if mpi.is_root(): with (open(os.path.join(FLAGS.feature_dir, FLAGS.model_file), "w")) as fid: pickle.dump(conv, fid) mpi.dump_matrix_multi(feat, os.path.join(FLAGS.feature_dir, FLAGS.feature_file)) mpi.dump_matrix_multi(caltech.labels(), os.path.join(FLAGS.feature_dir, FLAGS.label_file))
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) if FLAGS.trainer == "pink": trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1}) else: trainer = pipeline.ZcaTrainer({'reg': 0.1}) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = trainer), pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False}, trainer = pipeline.OMPTrainer( {'k': 100, 'max_iter':100})), pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive = True) logging.info('Dumping the pipeline...') if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid: pickle.dump(conv, fid) fid.close() logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d = True) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d = True) Ytest = cifar_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std """ covmat = mathutil.mpi_cov(Xtrain) eigval, eigvec = np.linalg.eigh(covmat) U = eigvec[:,-400:] * np.sqrt(eigval[-400:]) logging.info("Dump oriol") mpi.root_pickle((eigval, eigvec), 'cifar_dump_oriol.pickle') Xtrain = np.dot(Xtrain, U) Xtest = np.dot(Xtest, U) """ w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002, fminargs={'maxfun': 4000}) if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid: pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.info('Training accuracy: %f' % accu_train) logging.info('Testing accuracy: %f' % accu_test)
def testMkdir(self): mpi.mkdir(_MPI_TEST_DIR) self.assertTrue(os.path.exists(_MPI_TEST_DIR))
import cPickle as pickle from matplotlib import pyplot from iceberk import visualize, mpi from scipy import misc import numpy as np import matplotlib matplotlib.rcParams['ps.useafm'] = True matplotlib.rcParams['pdf.use14corefonts'] = True matplotlib.rcParams['text.usetex'] = True mpi.mkdir('distribution') pyplot.ion() dictionary, before_pooling, after_pooling = pickle.load(open('distribution_before_after_pooling.pickle')) after_pooling -= after_pooling.min(0) corr_before = np.corrcoef(before_pooling.T) corr_after = np.corrcoef(after_pooling.T) # do random sampling for visualization before_pooling = before_pooling[np.random.randint(before_pooling.shape[0], size=1000)] after_pooling = after_pooling[np.random.randint(after_pooling.shape[0], size=1000)] vis = visualize.PatchVisualizer() im = vis.show_multiple(dictionary) misc.imsave('distribution/1.png', im[:8,:8]) misc.imsave('distribution/2.png', im[:8,-8:]) misc.imsave('distribution/3.png', im[-8:,:8]) misc.imsave('distribution/4.png', im[-8:,-8:])
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) if FLAGS.trainer == "pink": trainer = pinker.SpatialPinkTrainer({ 'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1 }) else: trainer = pipeline.ZcaTrainer({'reg': 0.1}) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer=trainer), pipeline.ThresholdEncoder({ 'alpha': 0.0, 'twoside': False }, trainer=pipeline.OMPTrainer({ 'k': FLAGS.fromdim, 'max_iter': 100 })), pipeline.SpatialPooler({ 'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method }) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d=False) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d=False) Ytest = cifar_test.labels().astype(np.int) # before we do feature computation, try to do dimensionality reduction Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1]) Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1]) m, std = classifier.feature_meanstd(Xtrain, 0.01) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) if False: # directly do dimensionality reduction eigval, eigvec = np.linalg.eigh(covmat) U = eigvec[:, -FLAGS.todim:] Xtrain = np.dot(Xtrain, U) Xtest = np.dot(Xtest, U) else: # do subsampling import code_ap temp = code_ap.code_af(Xtrain, FLAGS.todim) sel = temp[0] sel = mpi.COMM.bcast(sel) Cpred = covmat[sel] Csel = Cpred[:, sel] W = np.linalg.solve(Csel, Cpred) # perform svd U, D, _ = np.linalg.svd(W, full_matrices=0) U *= D Xtrain = np.dot(Xtrain[:, sel], U) Xtest = np.dot(Xtest[:, sel], U) Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0]) Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0]) """ # This part is used to do post-pooling over all features nystrom subsampling # normalization Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:])) m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) eigval, eigvec = np.linalg.eigh(covmat) U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):] #U = eigvec[:,-400:] * np.sqrt(eigval[-400:]) Xtrain = np.dot(Xtrain, U) Xtest = np.dot(Xtest, U) """ w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002, fminargs={ 'disp': 0, 'maxfun': 1000 }) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.info('Training accuracy: %f' % accu_train) logging.info('Testing accuracy: %f' % accu_test)
pipeline.MeanvarNormalizer({'reg': 0.01}), pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.01})), pipeline.ReLUEncoder({'twoside': False}, trainer = pipeline.NormalizedKmeansTrainer({'k': 1600, 'max_iter': 100})), pipeline.PyramidPooler({'level': 3, 'method': 'max'}) ], prev = CONV) CONV_SPM_GAMMA = 0.01 logging.debug('Loading data...') train_data = visiondata.CUBDataset(ROOT, True, crop = CROP, subset = SUBSET, target_size = TARGET_SIZE, prefetch = True) test_data = visiondata.CUBDataset(ROOT, False, crop = CROP, subset = SUBSET, target_size = TARGET_SIZE, prefetch = True) mpi.mkdir(CONVOLUTION_OUTPUT) if MIRRORED: train_data = datasets.MirrorSet(train_data) # note that we do not mirror test data. logging.debug('Training convolutional NN...') CONV.train(train_data, 400000, exhaustive = True) CONV2.train(train_data, 400000, exhaustive = True) mpi.root_pickle(CONV2, CONVOLUTION_FILE) Xtrain = CONV2.process_dataset(train_data) Ytrain = train_data.labels().astype(np.int) Xtest = CONV2.process_dataset(test_data) Ytest = test_data.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
CONV_SPM_GAMMA = 0.01 logging.debug('Loading data...') train_data = visiondata.CUBDataset(ROOT, True, crop=CROP, subset=SUBSET, target_size=TARGET_SIZE, prefetch=True) test_data = visiondata.CUBDataset(ROOT, False, crop=CROP, subset=SUBSET, target_size=TARGET_SIZE, prefetch=True) mpi.mkdir(CONVOLUTION_OUTPUT) if MIRRORED: train_data = datasets.MirrorSet(train_data) # note that we do not mirror test data. logging.debug('Training convolutional NN...') CONV.train(train_data, 400000, exhaustive=True) CONV2.train(train_data, 400000, exhaustive=True) mpi.root_pickle(CONV2, CONVOLUTION_FILE) Xtrain = CONV2.process_dataset(train_data) Ytrain = train_data.labels().astype(np.int) Xtest = CONV2.process_dataset(test_data) Ytest = test_data.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg=0.01)
import cPickle as pickle from iceberk import visualize, mpi from scipy import misc import numpy as np import matplotlib from matplotlib import pyplot matplotlib.rcParams['ps.useafm'] = True matplotlib.rcParams['pdf.use14corefonts'] = True matplotlib.rcParams['text.usetex'] = True mpi.mkdir('centroids') mpi.mkdir('distribution') dictionary, ap_result = pickle.load(open('cvpr_exemplar_centroids.pickle')) vis = visualize.PatchVisualizer() im = vis.show_multiple(dictionary, bg_func=np.max) misc.imsave('centroids/dictionary.png', im) im = vis.show_multiple(dictionary[ap_result[0]], bg_func=np.max) misc.imsave('centroids/dictionary_major.png', im) eigval, eigval_rec, eigval_random = pickle.load( open('cvpr_exemplar_centroids_covmat_eigvals.pickle')) eigval = np.sort(eigval)[::-1] eigval_rec = np.sort(eigval_rec)[::-1] eigval_random = np.sort(eigval_random)[::-1] fig = pyplot.figure() pyplot.plot(np.log(eigval[:600]), 'g-', lw=2) pyplot.plot(np.log(eigval[:256]), 'b-.', lw=2)
from jiayq.experiments.feature_selection import pcfs import logging import numpy as np import os if mpi.is_root(): logging.basicConfig(level=logging.DEBUG) stl_folder = '/u/vis/x1/common/STL_10/stl10_matlab' NUM_REDUCED_DICT = 64 model_file_first = '/u/vis/ttmp/jiayq/stl/conv.pickle' model_file_second = '/u/vis/ttmp/jiayq/stl/conv_second.pickle' order_file = '/u/vis/ttmp/jiayq/stl/order.npy' covmat_file = '/u/vis/ttmp/jiayq/stl/covmat.npy' mpi.mkdir('/u/vis/ttmp/jiayq/stl/') logging.info("Loading stl dataset...") stl = visiondata.STL10Dataset(stl_folder, 'unlabeled') ################################################################################ # Train the first layer ################################################################################ if os.path.exists(model_file_first): logging.info("skipping the first layer training...") conv = pickle.load(open(model_file_first,'r')) else: logging.info("Setting up the convolutional layer...") conv = pipeline.ConvLayer([ pipeline.PatchExtractor([5, 5], 1), pipeline.MeanvarNormalizer({'reg': 10}),
import cPickle as pickle from iceberk import visualize, mpi from scipy import misc import numpy as np import matplotlib from matplotlib import pyplot matplotlib.rcParams["ps.useafm"] = True matplotlib.rcParams["pdf.use14corefonts"] = True matplotlib.rcParams["text.usetex"] = True mpi.mkdir("centroids") mpi.mkdir("distribution") dictionary, ap_result = pickle.load(open("cvpr_exemplar_centroids.pickle")) vis = visualize.PatchVisualizer() im = vis.show_multiple(dictionary, bg_func=np.max) misc.imsave("centroids/dictionary.png", im) im = vis.show_multiple(dictionary[ap_result[0]], bg_func=np.max) misc.imsave("centroids/dictionary_major.png", im) eigval, eigval_rec, eigval_random = pickle.load(open("cvpr_exemplar_centroids_covmat_eigvals.pickle")) eigval = np.sort(eigval)[::-1] eigval_rec = np.sort(eigval_rec)[::-1] eigval_random = np.sort(eigval_random)[::-1] fig = pyplot.figure() pyplot.plot(np.log(eigval[:600]), "g-", lw=2) pyplot.plot(np.log(eigval[:256]), "b-.", lw=2) pyplot.plot(np.log(eigval_rec[:256]), "r--", lw=2)
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) if FLAGS.trainer == "pink": trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1}) else: trainer = pipeline.ZcaTrainer({'reg': 0.1}) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = trainer), pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False}, trainer = pipeline.OMPTrainer( {'k': FLAGS.fromdim, 'max_iter':100})), pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive = True) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d = False) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d = False) Ytest = cifar_test.labels().astype(np.int) # before we do feature computation, try to do dimensionality reduction Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1]) Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1]) m, std = classifier.feature_meanstd(Xtrain, 0.01) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) if False: # directly do dimensionality reduction eigval, eigvec = np.linalg.eigh(covmat) U = eigvec[:, -FLAGS.todim:] Xtrain = np.dot(Xtrain, U) Xtest = np.dot(Xtest, U) else: # do subsampling import code_ap temp = code_ap.code_af(Xtrain, FLAGS.todim) sel = temp[0] sel = mpi.COMM.bcast(sel) Cpred = covmat[sel] Csel = Cpred[:,sel] W = np.linalg.solve(Csel, Cpred) # perform svd U, D, _ = np.linalg.svd(W, full_matrices = 0) U *= D Xtrain = np.dot(Xtrain[:, sel], U) Xtest = np.dot(Xtest[:, sel], U) Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0]) Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0]) """ # This part is used to do post-pooling over all features nystrom subsampling # normalization Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:])) m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) eigval, eigvec = np.linalg.eigh(covmat) U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):] #U = eigvec[:,-400:] * np.sqrt(eigval[-400:]) Xtrain = np.dot(Xtrain, U) Xtest = np.dot(Xtest, U) """ w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002, fminargs={'disp': 0, 'maxfun': 1000}) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.info('Training accuracy: %f' % accu_train) logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6, 6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False}, trainer = pipeline.NormalizedKmeansTrainer( {'k': FLAGS.fromdim, 'max_iter':100})), pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive = True) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d = False) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d = False) Ytest = cifar_test.labels().astype(np.int) # before we do feature computation, try to do dimensionality reduction Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1]) Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1]) # not only do we remove the mean of each feature, we remove the mean of each data point similar to # contrast normalization Xtrain -= Xtrain.mean(axis=1)[:, np.newaxis] Xtest -= Xtest.mean(axis=1)[:, np.newaxis] m, std = classifier.feature_meanstd(Xtrain, 0.01) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) current_dim = FLAGS.fromdim if FLAGS.svd == 1: eigval, eigvec = np.linalg.eigh(covmat) while current_dim >= 100: if current_dim < FLAGS.fromdim: if FLAGS.svd == 1: # directly do dimensionality reduction U = eigvec[:, -current_dim:] Xtrain_red = np.dot(Xtrain, U) Xtest_red = np.dot(Xtest, U) else: # do subsampling temp = code_ap.code_af(Xtrain, current_dim) logging.info("selected %d dims" % len(temp[0])) sel = temp[0] sel = mpi.COMM.bcast(sel) Cpred = covmat[sel] Csel = Cpred[:,sel] W = np.linalg.solve(Csel, Cpred) # perform svd U, D, _ = np.linalg.svd(W, full_matrices = 0) U *= D Xtrain_red = np.dot(Xtrain[:, sel], U) Xtest_red = np.dot(Xtest[:, sel], U) Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0]) Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0]) else: Xtrain_red = Xtrain.copy() Xtest_red = Xtest.copy() Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0]) Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0]) w, b = classifier.l2svm_onevsall(Xtrain_red, Ytrain, 0.005, fminargs={'disp': 0, 'maxfun': 1000}) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain_red, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest_red, w) + b) logging.info('%d - %d, Training accuracy: %f' % (FLAGS.fromdim, current_dim, accu_train)) logging.info('%d - %d, Testing accuracy: %f' % (FLAGS.fromdim, current_dim, accu_test)) current_dim /= 2
gflags.DEFINE_string("model", "", "The model file") gflags.DEFINE_string("folder", "", "The input folder that contains the data") gflags.DEFINE_string("output", "", "The output folder") FLAGS = gflags.FLAGS FLAGS(sys.argv) if FLAGS.folder == "" or FLAGS.model == "": sys.exit(1) model = np.load(FLAGS.model) w = model['w'] b = model['b'] if not os.path.exists(FLAGS.output): mpi.mkdir(FLAGS.output) files = glob.glob(os.path.join(FLAGS.folder, '*.mat')) files.sort() for i in range(mpi.RANK, len(files), mpi.SIZE): file = files[i] print '%d / %d: %s' % (i, len(files), file) fid = h5py.File(file, 'r') features = fid['features'] pred = np.dot(features, w) pred += b fidout = h5py.File(os.path.join(FLAGS.output, os.path.basename(file)), 'w') fidout['pred'] = pred fid.close() fidout.close()
from jiayq.experiments.feature_selection import pcfs import logging import numpy as np import os if mpi.is_root(): logging.basicConfig(level=logging.DEBUG) stl_folder = '/u/vis/x1/common/STL_10/stl10_matlab' NUM_REDUCED_DICT = 64 model_file_first = '/u/vis/ttmp/jiayq/stl/conv.pickle' model_file_second = '/u/vis/ttmp/jiayq/stl/conv_second.pickle' order_file = '/u/vis/ttmp/jiayq/stl/order.npy' covmat_file = '/u/vis/ttmp/jiayq/stl/covmat.npy' mpi.mkdir('/u/vis/ttmp/jiayq/stl/') logging.info("Loading stl dataset...") stl = visiondata.STL10Dataset(stl_folder, 'unlabeled') ################################################################################ # Train the first layer ################################################################################ if os.path.exists(model_file_first): logging.info("skipping the first layer training...") conv = pickle.load(open(model_file_first, 'r')) else: logging.info("Setting up the convolutional layer...") conv = pipeline.ConvLayer([ pipeline.PatchExtractor([5, 5], 1), pipeline.MeanvarNormalizer({'reg': 10}),
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6, 6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer( {'reg': 0.1})), # Does whitening pipeline.ThresholdEncoder({ 'alpha': 0.25, 'twoside': True }, trainer=pipeline.OMPTrainer({ 'k': 800, 'max_iter': 100 })), # does encoding pipeline.SpatialPooler({ 'grid': (2, 2), 'method': 'ave' }) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 50000) logging.info('Dumping the pipeline...') if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), 'w') as fid: pickle.dump(conv, fid) fid.close() with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), 'r') as fid: conv = pickle.load(fid) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d=True) mpi.dump_matrix_multi( Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_train')) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d=True) mpi.dump_matrix_multi( Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_test')) Ytest = cifar_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01) if mpi.is_root(): with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid: pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid) accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \ / float(len(Ytrain)) accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \ / float(len(Ytest)) logging.info('Training accuracy: %f' % accu) logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo(): """Performs a demo classification on cifar """ mpi.mkdir(FLAGS.output_dir) logging.info('Loading cifar data...') cifar = visiondata.CifarDataset(FLAGS.root, is_training=True) cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6, 6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})), pipeline.ThresholdEncoder({ 'alpha': 0.25, 'twoside': False }, trainer=pipeline.NormalizedKmeansTrainer({ 'k': FLAGS.fromdim, 'max_iter': 100 })), pipeline.SpatialPooler({ 'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method }) # average pool ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) logging.info('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d=False) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d=False) Ytest = cifar_test.labels().astype(np.int) # before we do feature computation, try to do dimensionality reduction Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1]) Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1]) m, std = classifier.feature_meanstd(Xtrain, 0.01) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std covmat = mathutil.mpi_cov(Xtrain) current_dim = FLAGS.fromdim if FLAGS.svd == 1: eigval, eigvec = np.linalg.eigh(covmat) while current_dim >= 100: if current_dim < FLAGS.fromdim: if FLAGS.svd == 1: # directly do dimensionality reduction U = eigvec[:, -current_dim:] Xtrain_red = np.dot(Xtrain, U) Xtest_red = np.dot(Xtest, U) else: # do subsampling temp = code_ap.code_af(Xtrain, current_dim) logging.info("selected %d dims" % len(temp[0])) sel = temp[0] sel = mpi.COMM.bcast(sel) Cpred = covmat[sel] Csel = Cpred[:, sel] W = np.linalg.solve(Csel, Cpred) # perform svd U, D, _ = np.linalg.svd(W, full_matrices=0) U *= D Xtrain_red = np.dot(Xtrain[:, sel], U) Xtest_red = np.dot(Xtest[:, sel], U) Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0]) Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0]) else: Xtrain_red = Xtrain.copy() Xtest_red = Xtest.copy() Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0]) Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0]) w, b = classifier.l2svm_onevsall(Xtrain_red, Ytrain, 0.005, fminargs={ 'disp': 0, 'maxfun': 1000 }) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain_red, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest_red, w) + b) logging.info('%d - %d, Training accuracy: %f' % (FLAGS.fromdim, current_dim, accu_train)) logging.info('%d - %d, Testing accuracy: %f' % (FLAGS.fromdim, current_dim, accu_test)) current_dim /= 2