Example #1
0
def _processFashionMNIST():
    pfile = getPYDIR()+'/datasets/fashion_mnist/proc-fashion_mnist.h5'
    DIR = os.path.dirname(pfile)
    createIfAbsent(DIR)
    if not os.path.exists(os.path.join(DIR,'train-images-idx3-ubyte.gz')):
        print 'Downloading data'
        urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',os.path.join(DIR,'train-images-idx3-ubyte.gz'))
        urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz',os.path.join(DIR,'train-labels-idx1-ubyte.gz'))
        urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',os.path.join(DIR,'t10k-images-idx3-ubyte.gz'))
        urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz',os.path.join(DIR,'t10k-labels-idx1-ubyte.gz'))
    if os.path.exists(pfile):
        print 'Found: ',pfile
        return pfile
    print DIR
    X, Y= readData(os.path.join(DIR,'train-images-idx3-ubyte.gz'), os.path.join(DIR,'train-labels-idx1-ubyte.gz'))
    np.random.seed(0)
    idxshuf   = np.random.permutation(X.shape[0])
    valid_idx = idxshuf[:10000]
    train_idx = idxshuf[10000:]
    train_x, train_y = np.clip(X[train_idx]/255., a_min=0.0, a_max=1.0), Y[train_idx]
    valid_x, valid_y = np.clip(X[valid_idx]/255., a_min=0.0, a_max=1.0), Y[valid_idx]
    test_x, test_y = readData(os.path.join(DIR,'t10k-images-idx3-ubyte.gz'), os.path.join(DIR,'t10k-labels-idx1-ubyte.gz'))
    test_x = np.clip(test_x/255., a_min=0.0, a_max=1.0)
    print 'Processing Fashion MNIST'
    h5f   = h5py.File(pfile, mode='w')
    h5f.create_dataset('train',data = train_x)
    h5f.create_dataset('train_y',data = train_y)
    h5f.create_dataset('test' ,data = test_x)
    h5f.create_dataset('test_y' ,data = test_y)
    h5f.create_dataset('valid',data = valid_x)
    h5f.create_dataset('valid_y',data = valid_y)
    h5f.close()
    for dd in [train_x, train_y, valid_x, valid_y, test_x, test_y]:
        print dd.shape, dd.min(), dd.max()
    print 'Done processing Fashion MNIST....',pfile
    return pfile
Example #2
0
sys.path.append('../')
""" Change this to modify the loadDataset function """
from load import loadDataset
""" 
This will contain a hashmap where the 
parameters correspond to the default ones modified
by any command line options given to this script
"""
from parse_args_dkf import parse
params = parse()
""" Some utility functions from theanomodels """
from utils.misc import removeIfExists, createIfAbsent, mapPrint, saveHDF5, displayTime
""" Load the dataset into a hashmap. See load.py for details  """
dataset = loadDataset()
params['savedir'] += '-template'
createIfAbsent(params['savedir'])
""" Add dataset and NADE parameters to "params"
    which will become part of the model
"""
for k in ['dim_observations', 'data_type']:
    params[k] = dataset[k]
mapPrint('Options: ', params)
if params['use_nade']:
    params['data_type'] = 'binary_nade'
"""
import DKF + learn/evaluate functions
"""
start_time = time.time()
from stinfmodel.dkf import DKF
import stinfmodel.learning as DKF_learn
import stinfmodel.evaluate as DKF_evaluate
Example #3
0
from optvaemodels.vae import VAE
import optvaemodels.vae_learn as VAE_learn
import optvaemodels.vae_evaluate as VAE_evaluate
import optvaemodels.vae_learn as VAE_learn
from optvaedatasets.load import loadDataset

models, epochval = OrderedDict(), OrderedDict()
models[
    'wikicorp-pl-2-finopt'] = './chkpt-wikicorp-finopt/VAE_lr-8_0e-04-ph-400-qh-400-ds-100-pl-2-ql-2-nl-relu-bs-500-ep-52-plr-1_0e-02-ar-0-otype-finopt-ns-100-etype-mlp-ll-mult-itype-tfidfl20_01_-uid'
epochval['wikicorp-pl-2-finopt'] = '50'

MODELS_TO_USE = models.keys()
print 'Evaluating on: ', MODELS_TO_USE

SAVEDIR = './evaluate_if_dir/'
createIfAbsent(SAVEDIR)

DNAME = ''
dataset_wiki = loadDataset('wikicorp')
additional_attrs_wiki = {}


def getTF(dataset):
    tfidf = TfidfTransformer(norm=None)
    tfidf.fit(dataset['train'])
    return tfidf.idf_


additional_attrs_wiki['idf'] = getTF(dataset_wiki)

for mname in MODELS_TO_USE: