def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'single_file.npy'))
     data2 = mpi.load_matrix_multi(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files'))
     files = glob.glob(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files*.npy'))
     files.sort()
     data3 = mpi.load_matrix_multi(files)
     files = glob.glob(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files*.mat'))
     files.sort()
     data4 = mpi.load_matrix_multi(files, name='data')
     np.testing.assert_array_equal(data1, data2)
     np.testing.assert_array_equal(data1, data3)
     np.testing.assert_array_equal(data1, data4)
Exemple #2
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(
         os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy'))
     data2 = mpi.load_matrix_multi(
         os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files'))
     np.testing.assert_array_equal(data1, data2)
Exemple #3
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(
         os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy'))
     data2 = mpi.load_matrix_multi(
         os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files'))
     files = glob.glob(
         os.path.join(testdir, 'data', 'dumploadmulti',
                      'multiple_files*.npy'))
     files.sort()
     data3 = mpi.load_matrix_multi(files)
     files = glob.glob(
         os.path.join(testdir, 'data', 'dumploadmulti',
                      'multiple_files*.mat'))
     files.sort()
     data4 = mpi.load_matrix_multi(files, name='data')
     np.testing.assert_array_equal(data1, data2)
     np.testing.assert_array_equal(data1, data3)
     np.testing.assert_array_equal(data1, data4)
Exemple #4
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'single_file.npy'))
     data2 = mpi.load_matrix_multi(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files'))
     np.testing.assert_array_equal(data1, data2)
'''
Created on Jan 16, 2013

@author: jiayq
'''
import cPickle as pickle
import logging
from matplotlib import pyplot
from iceberk import mpi, visiondata, pipeline, datasets, mathutil, visualize
import numpy as np

mpi.root_log_level(logging.DEBUG)
try:
    conv = pickle.load(open('conv.pickle'))
    regions_pooled = mpi.load_matrix_multi(\
                      '/tscratch/tmp/jiayq/pooled_lda/regions_pooled', N = 10)
except IOError:
    # compute the features
    logging.info("Generating the data...")
    bird = visiondata.CUBDataset('/u/vis/x1/common/CUB_200_2011',
            is_training=True, crop = 1.2, prefetch=True, target_size = [256,256])
    regions = pipeline.PatchExtractor([25,25], 1).sample(bird, 100000)
    regions.resize((regions.shape[0],) + (25,25,3))
    regions_data = datasets.NdarraySet(regions)
    try:
        conv = pickle.load(open('conv.pickle'))
    except IOError:
        logging.info("Training the feature extraction pipeline...")
        conv = pipeline.ConvLayer([
                pipeline.PatchExtractor([5, 5], 1), # extracts patches
                pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
gflags.DEFINE_integer("minibatch", 100000, "The minibatch size")
gflags.DEFINE_bool("svm", False, "If set, run SVM")
gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machines only."

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill=0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
                                     classifier.Loss2.loss_multiclass_logistic,
                                     classifier.Reg.reg_l2,
                                     args={
# local import
import tax

########
# Settings
########

FEATDIR = "/u/vis/ttmp/jiayq/cifar100/"

########
# Main script
########
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machine only."

Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR, 'Xtest'))
Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR, 'Ytest')).astype(np.int)

infogain = tax.cifar_info_gain()

info_mean = infogain.mean(axis=1)
info_max = infogain.max(axis=1)

randguess = sum([info_mean[y] for y in Ytest]) / float(len(Ytest))
bestguess = sum([info_max[y] for y in Ytest]) / float(len(Ytest))

print 'Random guess baseline:', randguess
print 'Best guess baseline:', bestguess

for filename in sys.argv[1:]:
    data = pickle.load(open(filename))
Exemple #8
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir, "data", "dumploadmulti", "single_file.npy"))
     data2 = mpi.load_matrix_multi(os.path.join(testdir, "data", "dumploadmulti", "multiple_files"))
     np.testing.assert_array_equal(data1, data2)
    mpi.root_pickle(CONV, __file__ + ".conv.pickle")
    Xtrain = CONV.process_dataset(train_data, as_2d=True)
    Xtest = CONV.process_dataset(test_data, as_2d=True)
    Ytrain = train_data.labels()
    Ytest = test_data.labels()
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR, "Xtrain"))
    mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR, "Xtest"))
    mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR, "Ytrain"))
    mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR, "Ytest"))
else:
    Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtrain"))
    Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtest"))
    Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytrain"))
    Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytest"))

if FLAGS.flat:
    logging.info("Performing flat classification")
    solver = classifier.SolverMC(
        FLAGS.reg, classifier.Loss.loss_multiclass_logistic, classifier.Reg.reg_l2, fminargs={"maxfun": 1000}
    )
    w, b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0))
    pred = np.dot(Xtrain, w) + b
    accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
    pred = np.dot(Xtest, w) + b
    accu_test = classifier.Evaluator.accuracy(Ytest, pred)
    logging.info("Reg %f, train accu %f, test accu %f" % (FLAGS.reg, accu_train, accu_test))
    mpi.root_pickle(CONV, __file__ + ".conv.pickle")
    Xtrain = CONV.process_dataset(train_data, as_2d = True)
    Xtest = CONV.process_dataset(test_data, as_2d = True)
    Ytrain = train_data.labels()
    Ytest = test_data.labels()
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR,'Xtrain'))
    mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR,'Xtest'))
    mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR,'Ytrain'))
    mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR,'Ytest'))
else:
    Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtrain'))
    Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtest'))
    Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytrain'))
    Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytest'))

if FLAGS.flat:
    logging.info("Performing flat classification")
    solver = classifier.SolverMC(FLAGS.reg,
                                 classifier.Loss.loss_multiclass_logistic,
                                 classifier.Reg.reg_l2,
                                 fminargs = {'maxfun': 1000})
    w,b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0))
    pred = np.dot(Xtrain, w) + b
    accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
    pred = np.dot(Xtest, w) + b
    accu_test = classifier.Evaluator.accuracy(Ytest, pred)
import cPickle as pickle
from iceberk import mpi, classifier, mathutil
import numpy as np
import logging
import os, sys

VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/"
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz"

mpi.root_log_level(logging.DEBUG)

Xval = mpi.load_matrix_multi(VALDIR + 'Xval')
Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy')
npzfile = np.load(DUMPNAME)
pred = np.dot(Xval, npzfile['w']) + npzfile['b']
accu = classifier.Evaluator.accuracy(Yval, 
        pred.argmax(1))
accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5)
logging.debug('accu: %f, %f', accu, accu5)

# perform training accuracy 
# local import
import tax

########
# Settings
########

FEATDIR = "/u/vis/ttmp/jiayq/cifar100/"

########
# Main script
########
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machine only."

Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtest'))
Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytest')).astype(np.int)
    
infogain = tax.cifar_info_gain()

info_mean = infogain.mean(axis=1)
info_max = infogain.max(axis=1)

randguess = sum([info_mean[y] for y in Ytest]) / float(len(Ytest))
bestguess = sum([info_max[y] for y in Ytest]) / float(len(Ytest))

print 'Random guess baseline:', randguess
print 'Best guess baseline:', bestguess

for filename in sys.argv[1:]:
    data = pickle.load(open(filename))
Exemple #13
0
import cPickle as pickle
from iceberk import mpi, classifier, mathutil
import numpy as np
import logging
import os, sys

VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/"
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz"

mpi.root_log_level(logging.DEBUG)

Xval = mpi.load_matrix_multi(VALDIR + 'Xval')
Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy')
npzfile = np.load(DUMPNAME)
pred = np.dot(Xval, npzfile['w']) + npzfile['b']
accu = classifier.Evaluator.accuracy(Yval, pred.argmax(1))
accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5)
logging.debug('accu: %f, %f', accu, accu5)

# perform training accuracy
# Settings
########
FEATDIR = "/u/vis/x1/common/ILSVRC-2010/SBOW/"
gflags.DEFINE_float("reg", 0.01, "The reg term")
gflags.DEFINE_integer("minibatch", 10000, "The minibatch size")
gflags.DEFINE_bool("svm", False, "If set, run SVM")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval'))
Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval'))
Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:]))

Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest'))
Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest'))
Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1]
for i in range(Xval.shape[0]):
Exemple #15
0
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC/subcategory/" + FLAGS.name \
        + "/ILSVRC-2010-LLC-SIFT-subcategory-model.npz"

########
# Main script
########
np.random.seed(int(time.time()) + mpi.RANK * 100)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")

if FLAGS.preload:
    files = glob.glob(FEATDIR + 'n*.mat')
    files.sort()
    if mpi.is_root():
        print 'files', files
    Xtrain = mpi.load_matrix_multi(files, name='features')
    Ytrain = mpi.load_matrix(FEATDIR + 'labels_ascii_sorted.npy').astype(np.int)
    sampler = mathutil.NdarraySampler([Xtrain, Ytrain, None], copy=False)
else:
    base_sampler = mathutil.PrefetchFileSampler(\
            [FEATDIR + '*.mat',
             FEATDIR + 'labels_ascii_sorted.npy',
             None])
    sampler = mathutil.PostProcessSampler(\
            base_sampler,
            [lambda X: X,
            lambda Y: Y.astype(np.int),
            None])

loss = classifier.Loss2.loss_multiclass_logistic_yvector
Exemple #16
0
'''
Created on Jan 16, 2013

@author: jiayq
'''
import cPickle as pickle
import logging
from matplotlib import pyplot
from iceberk import mpi, visiondata, pipeline, datasets, mathutil, visualize
import numpy as np

mpi.root_log_level(logging.DEBUG)
try:
    conv = pickle.load(open('conv.pickle'))
    regions_pooled = mpi.load_matrix_multi(\
                      '/tscratch/tmp/jiayq/pooled_lda/regions_pooled', N = 10)
except IOError:
    # compute the features
    logging.info("Generating the data...")
    bird = visiondata.CUBDataset('/u/vis/x1/common/CUB_200_2011',
                                 is_training=True,
                                 crop=1.2,
                                 prefetch=True,
                                 target_size=[256, 256])
    regions = pipeline.PatchExtractor([25, 25], 1).sample(bird, 100000)
    regions.resize((regions.shape[0], ) + (25, 25, 3))
    regions_data = datasets.NdarraySet(regions)
    try:
        conv = pickle.load(open('conv.pickle'))
    except IOError:
        logging.info("Training the feature extraction pipeline...")
FEATDIR = '/tscratch/tmp/sergeyk/imagenet-sbow/'
RESULTSDIR = '/u/vis/x1/sergeyk/imagenet-sbow/'
LABELS = '/u/vis/x1/jiayq/ILSVRC/{}_predict/labels_ascii_sorted.npy'

########
# Main script
########
# load the classifier weights
wb = pickle.load(open('/u/vis/jiayq/codes/python/imagenet_exp/script_train_imagenet_sbow_stochastic.py0.0001.pickle'))

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
for s in ['val', 'test', 'train']:
    logging.info("Loading data...")
    if s == 'train':
        print('Train accuracy is claimed to be: {:.3f}'.format(wb[-2]))
        # train is multiple matrices
        X = mpi.load_matrix_multi(os.path.join(FEATDIR, s, 'X{}'.format(s)))
    else:
        X = mpi.load_matrix(os.path.join(FEATDIR, s, 'X{}'.format(s)))
    X.resize(X.shape[0], np.prod(X.shape[1:]))
    # normalize to unit length
    X /= np.sqrt(inner1d(X, X)[:, np.newaxis] + 1e-8) / X.shape[1]
    Y = mpi.load_matrix(LABELS.format(s))
    print(X.shape)
    print(Y.shape)
    logging.info("Evaluating...")
    prob = np.dot(X, wb[0]) + wb[1]
    print('Accuracy on {}: {:.3f}'.format(s, classifier.Evaluator.accuracy(Y, prob.argmax(1))))
    np.save(os.path.join(RESULTSDIR, '{}_prob.npy'.format(s)), prob)