def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix( os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) np.testing.assert_array_equal(data1, data2)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) files = glob.glob(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.npy')) files.sort() data3 = mpi.load_matrix_multi(files) files = glob.glob(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.mat')) files.sort() data4 = mpi.load_matrix_multi(files, name='data') np.testing.assert_array_equal(data1, data2) np.testing.assert_array_equal(data1, data3) np.testing.assert_array_equal(data1, data4)
def testDumpLoad(self): local_size = 2 mat_sources = [np.random.rand(local_size), np.random.rand(local_size, 2), np.random.rand(local_size, 2, 3)] for mat in mat_sources: mpi.dump_matrix(mat, _MPI_DUMP_TEST_FILE) if mpi.is_root(): mat_dumped = np.load(_MPI_DUMP_TEST_FILE) self.assertEqual(mat_dumped.shape, (local_size * mpi.SIZE,) + mat.shape[1:]) mat_read = mpi.load_matrix(_MPI_DUMP_TEST_FILE) self.assertEqual(mat.shape, mat_read.shape)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) np.testing.assert_array_equal(data1, data2)
def testDumpLoad(self): local_size = 2 mat_sources = [ np.random.rand(local_size), np.random.rand(local_size, 2), np.random.rand(local_size, 2, 3) ] for mat in mat_sources: mpi.dump_matrix(mat, _MPI_DUMP_TEST_FILE) if mpi.is_root(): mat_dumped = np.load(_MPI_DUMP_TEST_FILE) self.assertEqual(mat_dumped.shape, (local_size * mpi.SIZE, ) + mat.shape[1:]) mat_read = mpi.load_matrix(_MPI_DUMP_TEST_FILE) self.assertEqual(mat.shape, mat_read.shape)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix( os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) files = glob.glob( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.npy')) files.sort() data3 = mpi.load_matrix_multi(files) files = glob.glob( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.mat')) files.sort() data4 = mpi.load_matrix_multi(files, name='data') np.testing.assert_array_equal(data1, data2) np.testing.assert_array_equal(data1, data3) np.testing.assert_array_equal(data1, data4)
for i, id in enumerate(ap_result[0]): # we will visualize each centroid and its 10 most related guys print i centroid = dictionary[id] neighbors = np.flatnonzero(ap_result[1] == i) idx = np.argsort(-ap_result[2][id, neighbors]) # select the 10 closest neighbors = neighbors[idx][:11] if len(neighbors) > 0: im = vis.show_single(centroid) misc.imsave('centroids/%d.png' % (i), im) im = vis.show_multiple(dictionary[neighbors], 1, np.max) misc.imsave('centroids/%d-neighbors.png' % (i), im) within_cluster_samples = mpi.load_matrix(\ 'cvpr_exemplar_centroids_distribution_within_cluster_postpooling.npy') between_centroids_samples = mpi.load_matrix(\ 'cvpr_exemplar_centroids_distribution_between_cluster_postpooling.npy') within_cluster_samples_prepooling = mpi.load_matrix(\ 'cvpr_exemplar_centroids_distribution_within_cluster_prepooling.npy') corr_within_cluster = np.corrcoef(within_cluster_samples.T) corr_between_centroids = np.corrcoef(between_centroids_samples.T) corr_within_cluster_prepooling = np.corrcoef( within_cluster_samples_prepooling.T) # subsample for visualization within_cluster_samples = within_cluster_samples[\ np.random.randint(within_cluster_samples.shape[0], size=2000)] between_centroids_samples = between_centroids_samples[\ np.random.randint(between_centroids_samples.shape[0], size=2000)] within_cluster_samples_prepooling = within_cluster_samples_prepooling[\ np.random.randint(within_cluster_samples_prepooling.shape[0], size=2000)]
######## FEATDIR = "/tscratch/tmp/jiayq/imagenet-sbow/" MODEL_NAME = 'script_train_imagenet_sbow_stochastic.py.flat.0.001.pickle' NTRIAL = 10000 SETSIZE = 10 FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval')) Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval')) Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:])) Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest')) Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest')) Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1] for i in range(Xval.shape[0]): Xval[i] /= np.sqrt(np.dot(Xval[i], Xval[i]) + 1e-8) / Xval.shape[1]
for i, id in enumerate(ap_result[0]): # we will visualize each centroid and its 10 most related guys print i centroid = dictionary[id] neighbors = np.flatnonzero(ap_result[1] == i) idx = np.argsort(-ap_result[2][id, neighbors]) # select the 10 closest neighbors = neighbors[idx][:11] if len(neighbors) > 0: im = vis.show_single(centroid) misc.imsave("centroids/%d.png" % (i), im) im = vis.show_multiple(dictionary[neighbors], 1, np.max) misc.imsave("centroids/%d-neighbors.png" % (i), im) within_cluster_samples = mpi.load_matrix("cvpr_exemplar_centroids_distribution_within_cluster_postpooling.npy") between_centroids_samples = mpi.load_matrix("cvpr_exemplar_centroids_distribution_between_cluster_postpooling.npy") within_cluster_samples_prepooling = mpi.load_matrix( "cvpr_exemplar_centroids_distribution_within_cluster_prepooling.npy" ) corr_within_cluster = np.corrcoef(within_cluster_samples.T) corr_between_centroids = np.corrcoef(between_centroids_samples.T) corr_within_cluster_prepooling = np.corrcoef(within_cluster_samples_prepooling.T) # subsample for visualization within_cluster_samples = within_cluster_samples[np.random.randint(within_cluster_samples.shape[0], size=2000)] between_centroids_samples = between_centroids_samples[np.random.randint(between_centroids_samples.shape[0], size=2000)] within_cluster_samples_prepooling = within_cluster_samples_prepooling[ np.random.randint(within_cluster_samples_prepooling.shape[0], size=2000) ]
######## FEATDIR = "/u/vis/x1/common/ILSVRC-2010/SBOW/" gflags.DEFINE_float("reg", 0.01, "The reg term") gflags.DEFINE_integer("minibatch", 10000, "The minibatch size") gflags.DEFINE_bool("svm", False, "If set, run SVM") FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval')) Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval')) Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:])) Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest')) Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest')) Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1] for i in range(Xval.shape[0]): Xval[i] /= np.sqrt(np.dot(Xval[i],Xval[i]) + 1e-8) / Xval.shape[1]
+ "/ILSVRC-2010-LLC-SIFT-subcategory-model.npz" ######## # Main script ######## np.random.seed(int(time.time()) + mpi.RANK * 100) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") if FLAGS.preload: files = glob.glob(FEATDIR + 'n*.mat') files.sort() if mpi.is_root(): print 'files', files Xtrain = mpi.load_matrix_multi(files, name='features') Ytrain = mpi.load_matrix(FEATDIR + 'labels_ascii_sorted.npy').astype(np.int) sampler = mathutil.NdarraySampler([Xtrain, Ytrain, None], copy=False) else: base_sampler = mathutil.PrefetchFileSampler(\ [FEATDIR + '*.mat', FEATDIR + 'labels_ascii_sorted.npy', None]) sampler = mathutil.PostProcessSampler(\ base_sampler, [lambda X: X, lambda Y: Y.astype(np.int), None]) loss = classifier.Loss2.loss_multiclass_logistic_yvector callback = None
None]) loss = classifier.Loss2.loss_multiclass_logistic DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model-hier.npz" else: sampler = mathutil.PostProcessSampler( base_sampler, [lambda X: X.astype('float64') / np.sqrt(inner1d(X, X)[:, np.newaxis]), lambda Y: Y.astype(np.int), None]) loss = classifier.Loss2.loss_multiclass_logistic_yvector files = glob.glob(VALDIR + '/*.npy') Xval = np.vstack([np.load(f) for f in sorted(files)]).astype('float64') Xval /= np.sqrt(inner1d(Xval, Xval)[:, np.newaxis]) Yval = mpi.load_matrix(VAL_LABEL) callback = [lambda wb: classifier.Evaluator.accuracy( Yval, (np.dot(Xval, wb[0]) + wb[1]).argmax(1))] logging.info("Performing classification") if os.path.exists(DUMPNAME): resume = DUMPNAME else: resume = None # Do search for reg, base_lr param_grid = { 'base_lr': [.1, .01, .001, .0001], 'reg': [1e-3, 1e-5, 1e-8, 1e-10] } param_settings = [dict(zip(('base_lr', 'reg'), x)) for x in list(itertools.product(param_grid['base_lr'], param_grid['reg']))]
FEATDIR = '/tscratch/tmp/sergeyk/imagenet-sbow/' RESULTSDIR = '/u/vis/x1/sergeyk/imagenet-sbow/' LABELS = '/u/vis/x1/jiayq/ILSVRC/{}_predict/labels_ascii_sorted.npy' ######## # Main script ######## # load the classifier weights wb = pickle.load(open('/u/vis/jiayq/codes/python/imagenet_exp/script_train_imagenet_sbow_stochastic.py0.0001.pickle')) np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) for s in ['val', 'test', 'train']: logging.info("Loading data...") if s == 'train': print('Train accuracy is claimed to be: {:.3f}'.format(wb[-2])) # train is multiple matrices X = mpi.load_matrix_multi(os.path.join(FEATDIR, s, 'X{}'.format(s))) else: X = mpi.load_matrix(os.path.join(FEATDIR, s, 'X{}'.format(s))) X.resize(X.shape[0], np.prod(X.shape[1:])) # normalize to unit length X /= np.sqrt(inner1d(X, X)[:, np.newaxis] + 1e-8) / X.shape[1] Y = mpi.load_matrix(LABELS.format(s)) print(X.shape) print(Y.shape) logging.info("Evaluating...") prob = np.dot(X, wb[0]) + wb[1] print('Accuracy on {}: {:.3f}'.format(s, classifier.Evaluator.accuracy(Y, prob.argmax(1)))) np.save(os.path.join(RESULTSDIR, '{}_prob.npy'.format(s)), prob)
import cPickle as pickle from iceberk import mpi, classifier, mathutil import numpy as np import logging import os, sys VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/" DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz" mpi.root_log_level(logging.DEBUG) Xval = mpi.load_matrix_multi(VALDIR + 'Xval') Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy') npzfile = np.load(DUMPNAME) pred = np.dot(Xval, npzfile['w']) + npzfile['b'] accu = classifier.Evaluator.accuracy(Yval, pred.argmax(1)) accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5) logging.debug('accu: %f, %f', accu, accu5) # perform training accuracy
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, "data", "dumploadmulti", "single_file.npy")) data2 = mpi.load_matrix_multi(os.path.join(testdir, "data", "dumploadmulti", "multiple_files")) np.testing.assert_array_equal(data1, data2)
gflags.DEFINE_bool("svm", False, "If set, run SVM") gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss") FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## if mpi.SIZE > 1: raise RuntimeError, "This script runs on single machines only." np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1] logging.info("Performing classification") target = classifier.to_one_of_k_coding(Ytrain, fill=0) # stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs solver = classifier.SolverStochastic(FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args={ 'mode': 'lbfgs',
gflags.DEFINE_bool("svm", False, "If set, run SVM") gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss") FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## if mpi.SIZE > 1: raise RuntimeError, "This script runs on single machines only." np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1] logging.info("Performing classification") target = classifier.to_one_of_k_coding(Ytrain, fill = 0) # stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs solver = classifier.SolverStochastic(FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args = {'mode': 'lbfgs', 'minibatch': FLAGS.minibatch, 'num_iter': 20}, fminargs = {'maxfun': 20, 'disp': 0})