def bird_demo(): logging.info('Loading data...') bird = visiondata.CUBDataset(FLAGS.root, is_training=True, crop=FLAGS.crop, version=FLAGS.version, prefetch=True, target_size = TARGET_SIZE) bird_test = visiondata.CUBDataset(FLAGS.root, is_training=False, crop=FLAGS.crop, version=FLAGS.version, prefetch=True, target_size = TARGET_SIZE) if FLAGS.mirrored: bird = datasets.MirrorSet(bird) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True}, trainer = pipeline.OMPTrainer( {'k': FLAGS.k, 'max_iter':100})), pipeline.SpatialPooler({'grid': 4, 'method': 'max'}) ], fixed_size = True) logging.info('Training the pipeline...') conv.train(bird, 400000, exhaustive = True) logging.info('Extracting features...') Xtrain = conv.process_dataset(bird, as_2d = True) Ytrain = bird.labels().astype(np.int) Xtest = conv.process_dataset(bird_test, as_2d = True) Ytest = bird_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005, fminargs={'maxfun': 1000}) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.info('Training accuracy: %f' % accu_train) logging.info('Testing accuracy: %f' % accu_test) mpi.root_pickle((m, std, w, b, conv[-2].dictionary), 'debug_features.pickle')
eigval_random = np.zeros_like(eigval_recon) for i in range(10): sel = np.arange(covmat.shape[0]) np.random.shuffle(sel) sel = sel[:todim] Cpred = covmat[sel] Csel = Cpred[:,sel] Crecon = np.dot(Cpred.T, np.dot(np.linalg.pinv(Csel), Cpred)) Crecon = (Crecon + Crecon.T) / 2 eigval_temp = np.linalg.eigvals(Crecon) eigval_random += np.sort(eigval_temp) eigval_random = mpi.COMM.allreduce(eigval_random) eigval_random /= 10 * mpi.SIZE """ mpi.root_pickle(conv, 'cvpr_exemplar_centroids_conv.pickle') mpi.root_pickle((conv[-2].dictionary, ap_result), 'cvpr_exemplar_centroids.pickle') """ mpi.root_pickle((eigval, eigval_recon, eigval_random), 'cvpr_exemplar_centroids_covmat_eigvals.pickle') # perform sampling # sample post-pooling guys Xtrain *= std Xtrain += m sampler = mathutil.ReservoirSampler(2000) for i in range(covmat.shape[0]): label = ap_result[1][i] centroid_id = ap_result[0][label] if centroid_id != i: sampler.consider(Xtrain[:, [i, centroid_id]])
CONV_SPM_GAMMA = 0.01 logging.debug('Loading data...') train_data = visiondata.CUBDataset(ROOT, True, crop = CROP, subset = SUBSET, target_size = TARGET_SIZE, prefetch = True) test_data = visiondata.CUBDataset(ROOT, False, crop = CROP, subset = SUBSET, target_size = TARGET_SIZE, prefetch = True) mpi.mkdir(CONVOLUTION_OUTPUT) if MIRRORED: train_data = datasets.MirrorSet(train_data) # note that we do not mirror test data. logging.debug('Training convolutional NN...') CONV.train(train_data, 400000, exhaustive = True) CONV2.train(train_data, 400000, exhaustive = True) mpi.root_pickle(CONV2, CONVOLUTION_FILE) Xtrain = CONV2.process_dataset(train_data) Ytrain = train_data.labels().astype(np.int) Xtest = CONV2.process_dataset(test_data) Ytest = test_data.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005, fminargs={'maxfun': 500})
"If set, perform classification with tree regularization.") gflags.DEFINE_float("reg", 0.01, "The regularization term used in the classification.") gflags.FLAGS(sys.argv) FLAGS = gflags.FLAGS mpi.root_log_level(level=logging.DEBUG) if FLAGS.extract: train_data = visiondata.CUBDataset(ROOT, True, crop = CROP, target_size = TARGET_SIZE, prefetch = True) test_data = visiondata.CUBDataset(ROOT, False, crop = CROP, target_size = TARGET_SIZE, prefetch = True) if MIRRORED: train_data = datasets.MirrorSet(train_data) CONV.train(train_data, 400000, exhaustive = True) mpi.root_pickle(CONV, __file__ + ".conv.pickle") Xtrain = CONV.process_dataset(train_data, as_2d = True) Xtest = CONV.process_dataset(test_data, as_2d = True) Ytrain = train_data.labels() Ytest = test_data.labels() m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR,'Xtrain')) mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR,'Xtest')) mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR,'Ytrain')) mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR,'Ytest')) else: Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtrain'))
logging.info('Skipping first layer training') except Exception, e: conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6,6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), # Does whitening pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False}, trainer = pipeline.NormalizedKmeansTrainer( {'k': 1600, 'max_iter':100})), # does encoding pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) mpi.root_pickle(conv, 'cifar_conv.pickle') # do pruning try: selected_idx = pickle.load(open('cifar_selected_idx.pickle')) logging.info('Skipping first layer pruning') except Exception, e: features = conv.sample(cifar, 200000, True) mpi.dump_matrix_multi(features, '/u/vis/ttmp/jiayq/cifar/cifar_feature_pooled_sample') m, std = mathutil.mpi_meanstd(features) features -= m features /= std covmat = mathutil.mpi_cov(features, reg = 0.01) if mpi.is_root(): selected_idx = pcfs.max_variance_feature_selection(covmat, 800) else:
gflags.DEFINE_bool("flat", False, "If set, perform flat classification.") gflags.DEFINE_bool("hier", False, "If set, perform hierarchical classification.") gflags.DEFINE_bool("hierlog", False, "If set, perform hierarchical classification with log info gain.") gflags.DEFINE_bool("treereg", False, "If set, perform classification with tree regularization.") gflags.DEFINE_float("reg", 0.01, "The regularization term used in the classification.") gflags.FLAGS(sys.argv) FLAGS = gflags.FLAGS mpi.root_log_level(level=logging.DEBUG) if FLAGS.extract: train_data = visiondata.CUBDataset(ROOT, True, crop=CROP, target_size=TARGET_SIZE, prefetch=True) test_data = visiondata.CUBDataset(ROOT, False, crop=CROP, target_size=TARGET_SIZE, prefetch=True) if MIRRORED: train_data = datasets.MirrorSet(train_data) CONV.train(train_data, 400000, exhaustive=True) mpi.root_pickle(CONV, __file__ + ".conv.pickle") Xtrain = CONV.process_dataset(train_data, as_2d=True) Xtest = CONV.process_dataset(test_data, as_2d=True) Ytrain = train_data.labels() Ytest = test_data.labels() m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR, "Xtrain")) mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR, "Xtest")) mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR, "Ytrain")) mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR, "Ytest")) else: Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtrain"))
prefetch=True) test_data = visiondata.CUBDataset(ROOT, False, crop=CROP, subset=SUBSET, target_size=TARGET_SIZE, prefetch=True) mpi.mkdir(CONVOLUTION_OUTPUT) if MIRRORED: train_data = datasets.MirrorSet(train_data) # note that we do not mirror test data. logging.debug('Training convolutional NN...') CONV.train(train_data, 400000, exhaustive=True) CONV2.train(train_data, 400000, exhaustive=True) mpi.root_pickle(CONV2, CONVOLUTION_FILE) Xtrain = CONV2.process_dataset(train_data) Ytrain = train_data.labels().astype(np.int) Xtest = CONV2.process_dataset(test_data) Ytest = test_data.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg=0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain,
logging.debug('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) for pid, pool_size in enumerate(pool_sizes): conv[-1] = pipeline.SpatialPooler({'grid': (pool_size, pool_size), 'method': 'rms'}) logging.debug('Extracting features...') Xtrain = conv.process_dataset(cifar, as_2d = True) Ytrain = cifar.labels().astype(np.int) Xtest = conv.process_dataset(cifar_test, as_2d = True) Ytest = cifar_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg = 0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005, fminargs={'disp': 0, 'maxfun': 1000}) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.debug('Training accuracy: %f' % accu_train) logging.info('code %d, pool %d, testing accuracy: %f' % (code_size, pool_size, accu_test)) accuracy_record[cid, pid] = accu_test mpi.root_pickle((code_sizes, pool_sizes, accuracy_record), 'cifar_code_pool_size_comparison.pickle')
def bird_demo(): logging.info('Loading data...') bird = visiondata.CUBDataset(FLAGS.root, is_training=True, crop=FLAGS.crop, version=FLAGS.version, prefetch=True, target_size=TARGET_SIZE) bird_test = visiondata.CUBDataset(FLAGS.root, is_training=False, crop=FLAGS.crop, version=FLAGS.version, prefetch=True, target_size=TARGET_SIZE) if FLAGS.mirrored: bird = datasets.MirrorSet(bird) conv = pipeline.ConvLayer( [ pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})), pipeline.ThresholdEncoder({ 'alpha': 0.25, 'twoside': True }, trainer=pipeline.OMPTrainer( { 'k': FLAGS.k, 'max_iter': 100 })), pipeline.SpatialPooler({ 'grid': 4, 'method': 'max' }) ], fixed_size=True) logging.info('Training the pipeline...') conv.train(bird, 400000, exhaustive=True) logging.info('Extracting features...') Xtrain = conv.process_dataset(bird, as_2d=True) Ytrain = bird.labels().astype(np.int) Xtest = conv.process_dataset(bird_test, as_2d=True) Ytest = bird_test.labels().astype(np.int) # normalization m, std = classifier.feature_meanstd(Xtrain, reg=0.01) # to match Adam Coates' pipeline Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005, fminargs={'maxfun': 1000}) accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b) accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b) logging.info('Training accuracy: %f' % accu_train) logging.info('Testing accuracy: %f' % accu_test) mpi.root_pickle((m, std, w, b, conv[-2].dictionary), 'debug_features.pickle')
if FLAGS.svm: # do svm target = classifier.to_one_of_k_coding(Ytrain, fill = -1) loss = classifier.Loss2.loss_hinge else: target = Ytrain.astype(np.int) loss = classifier.Loss2.loss_multiclass_logistic_yvector solver = classifier.SolverStochastic(FLAGS.reg, loss, classifier.Reg.reg_l2, args = {'mode': 'adagrad', 'base_lr': 1e-7, 'minibatch': FLAGS.minibatch, 'num_iter': 1000, 'callback': callback}) sampler = mathutil.NdarraySampler((Xtrain, target, None)) w,b = solver.solve(sampler, None, K = 1000) pred = (np.dot(Xtrain, w) + b).argmax(1) accu_train = classifier.Evaluator.accuracy(Ytrain, pred) logging.info("Reg %f, train accu %f" % \ (FLAGS.reg, accu_train)) if FLAGS.hier: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".hier.pickle") elif FLAGS.svm: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".svm.pickle") else: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".pickle")
else: target = Ytrain.astype(np.int) loss = classifier.Loss2.loss_multiclass_logistic_yvector solver = classifier.SolverStochastic(FLAGS.reg, loss, classifier.Reg.reg_l2, args={ 'mode': 'adagrad', 'base_lr': 1e-7, 'minibatch': FLAGS.minibatch, 'num_iter': 1000, 'callback': callback }) sampler = mathutil.NdarraySampler((Xtrain, target, None)) w, b = solver.solve(sampler, None, K=1000) pred = (np.dot(Xtrain, w) + b).argmax(1) accu_train = classifier.Evaluator.accuracy(Ytrain, pred) logging.info("Reg %f, train accu %f" % \ (FLAGS.reg, accu_train)) if FLAGS.hier: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".hier.pickle") elif FLAGS.svm: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".svm.pickle") else: mpi.root_pickle((w, b, FLAGS.reg, accu_train), __file__ + str(FLAGS.reg) + ".pickle")
pipeline.PatchExtractor([5, 5], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), #pipeline.SpatialMeanNormalizer({'channels': 3}), pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False}, trainer = pipeline.OMPTrainer( {'k': 3200, 'max_iter':100})), pipeline.KernelPooler(\ {'kernel': pipeline.KernelPooler.kernel_uniform(15), 'method': 'max', 'stride': 1}) ], fixed_size = True) conv.train(regions_data, 400000) mpi.root_pickle(conv, "conv.pickle") # so let's get the regions' features after pooling. regions_pooled = conv.process_dataset(regions_data) mpi.dump_matrix_multi(regions_pooled, '/tscratch/tmp/jiayq/pooled_lda/regions_pooled') logging.info("Feature shape:" + str(regions_pooled.shape[1:])) std = mathutil.mpi_std(regions_pooled.reshape(regions_pooled.shape[0], \ np.prod(regions_pooled.shape[1:]))) # compute the std mean std.resize(np.prod(regions_pooled.shape[1:-1]), regions_pooled.shape[-1]) std = std.mean(axis=0) std_order = np.argsort(std) # now, compute the within-class std regions_pooled_view = regions_pooled.reshape(
import cPickle as pickle import logging from iceberk import mpi, datasets, visiondata, pipeline, classifier import numpy as np import os import sys mpi.root_log_level(logging.DEBUG) logging.info('Loading the dataset...') ilsvrc = visiondata.ILSVRCDataset('/u/vis/x1/common/ILSVRC-2010/train/', ['jpeg'], prefetch=False, center_crop = 256) conv = pipeline.ConvLayer([ pipeline.PatchExtractor([6,6], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), # Does whitening pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True}, trainer = pipeline.OMPTrainer( {'k': 1600, 'max_iter':100})), # does encoding pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) # average pool ]) logging.info('Training the pipeline...') conv.train(ilsvrc, 400000) logging.info('Dumping the pipeline...') mpi.root_pickle(conv, 'ilsvrc_vanilla_conv.pickle')
pipeline.PatchExtractor([5, 5], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches pipeline.LinearEncoder({}, trainer = pipeline.ZcaTrainer({'reg': 0.1})), #pipeline.SpatialMeanNormalizer({'channels': 3}), pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False}, trainer = pipeline.OMPTrainer( {'k': 3200, 'max_iter':100})), pipeline.KernelPooler(\ {'kernel': pipeline.KernelPooler.kernel_uniform(15), 'method': 'max', 'stride': 1}) ], fixed_size = True) conv.train(regions_data, 400000) mpi.root_pickle(conv, "conv.pickle") # so let's get the regions' features after pooling. regions_pooled = conv.process_dataset(regions_data) mpi.dump_matrix_multi(regions_pooled, '/tscratch/tmp/jiayq/pooled_lda/regions_pooled') logging.info("Feature shape:" + str(regions_pooled.shape[1:])) std = mathutil.mpi_std(regions_pooled.reshape(regions_pooled.shape[0], \ np.prod(regions_pooled.shape[1:]))) # compute the std mean std.resize(np.prod(regions_pooled.shape[1:-1]), regions_pooled.shape[-1]) std = std.mean(axis=0) std_order = np.argsort(std) # now, compute the within-class std regions_pooled_view = regions_pooled.reshape(regions_pooled.shape[0],
}, trainer=pipeline.NormalizedKmeansTrainer({ 'k': 1600, 'max_iter': 100 })), # does encoding pipeline.SpatialPooler({ 'grid': (2, 2), 'method': 'ave' }) ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) mpi.root_pickle(conv, 'cifar_conv.pickle') # do pruning try: selected_idx = pickle.load(open('cifar_selected_idx.pickle')) logging.info('Skipping first layer pruning') except Exception, e: features = conv.sample(cifar, 200000, True) mpi.dump_matrix_multi( features, '/u/vis/ttmp/jiayq/cifar/cifar_feature_pooled_sample') m, std = mathutil.mpi_meanstd(features) features -= m features /= std covmat = mathutil.mpi_cov(features, reg=0.01) if mpi.is_root(): selected_idx = pcfs.max_variance_feature_selection(covmat, 800)
}, fminargs={ 'maxfun': 20, 'disp': 0 }) sampler = mathutil.NdarraySampler((Xtrain, target, None)) w, b = solver.solve(sampler) logging.info("Stochastic LBFGS done.") skf = StratifiedKFold(Ytrain, k=10) skf_results = [] for train_index, test_index in skf: param_init = (w, b) solver = classifier.SolverStochastic( FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args={ 'mode': 'adagrad', 'base_lr': 1e-4, 'minibatch': FLAGS.minibatch, 'num_iter': 1000 }) del target target = classifier.to_one_of_k_coding(Ytrain[train_index], fill=0) sampler = mathutil.NdarraySampler((Xtrain[train_index], target, None)) ww, bb = solver.solve(sampler, param_init) skf_results.append((ww, bb, train_index, test_index)) mpi.root_pickle(skf_results, __file__ + str(FLAGS.reg) + ".pickle")
logging.info("Performing classification") target = classifier.to_one_of_k_coding(Ytrain, fill = 0) # stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs solver = classifier.SolverStochastic(FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args = {'mode': 'lbfgs', 'minibatch': FLAGS.minibatch, 'num_iter': 20}, fminargs = {'maxfun': 20, 'disp': 0}) sampler = mathutil.NdarraySampler((Xtrain, target, None)) w,b = solver.solve(sampler) logging.info("Stochastic LBFGS done.") skf = StratifiedKFold(Ytrain, k = 10) skf_results = [] for train_index, test_index in skf: param_init = (w,b) solver = classifier.SolverStochastic(FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args = {'mode': 'adagrad', 'base_lr': 1e-4, 'minibatch': FLAGS.minibatch, 'num_iter': 1000}) del target target = classifier.to_one_of_k_coding(Ytrain[train_index], fill = 0) sampler = mathutil.NdarraySampler((Xtrain[train_index], target, None)) ww, bb = solver.solve(sampler, param_init) skf_results.append((ww,bb,train_index,test_index)) mpi.root_pickle(skf_results, __file__ + str(FLAGS.reg) + ".pickle")