Esempio n. 1
0
def load_model_elbo():
    # Lets look at the statistics saved at epoch 40
    stats = loadHDF5('./chkpt/lorenz/DMM_lr-0_0008-dh-40-ds-'+str(DIM_STOCHASTIC)+'-nl-relu-bs-200-ep-1000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP990-stats.h5')
    print [(k, stats[k].shape) for k in stats.keys()]
    plt.figure(figsize=(8, 10))
    plt.plot(stats['train_bound'][:, 0], stats['train_bound'][:, 1], '-o', color='g', label='Train')
    plt.plot(stats['valid_bound'][:, 0], stats['valid_bound'][:, 1], '-*', color='b', label='Validate')
    plt.legend()
    plt.xlabel('Epochs')
    plt.ylabel('Upper Bound on $-\log p(x)$')
    plt.show()
Esempio n. 2
0
def plot_loss(unique_id):
    stats = loadHDF5('./dmm_models/{}-EP100-stats.h5'.format(unique_id))
    plt.plot(stats['train_bound'][:, 0],
             stats['train_bound'][:, 1],
             '-o',
             color='g',
             label='Train')
    plt.plot(stats['valid_bound'][:, 0],
             stats['valid_bound'][:, 1],
             '-*',
             color='b',
             label='Validate')
    plt.legend()
    plt.xlabel('Epochs')
    plt.ylabel('Upper Bound on $-\log p(x)$')
    plt.savefig('./dmm_models/{}-EP100-loss.png'.format(unique_id),
                format='png')
Esempio n. 3
0
    def load_from_hdf5_latent(self, dname = "mimic-cancer", feat_name='mu', ssi=False, cohort=False, nrows=None):

        
        #Load the latent representatinon instead of the raw features
        representations = loadHDF5('/data/ml2/vishakh/SHARED/representations.h5')

        # we need the labels anyways and we still care which class
        data_dict = loadDataset(dname);
        if ssi:
            feat_name = 'ssi-'+feat_name
            
        #only xs change 
        self.x_train = representations['train-vae-' + feat_name]
        self.x_test = representations['test-vae-' + feat_name]
        self.x_valid = representations['valid-vae-' + feat_name]
        
        
        if cohort:
            self.y_valid = data_dict['valid_c']
            self.y_train = data_dict['train_c']
            self.y_test = data_dict['test_c']

        else:
            self.y_valid = data_dict['valid_y']
            self.y_train = data_dict['train_y']
            self.y_test = data_dict['test_y']
        

        if nrows is not None:
            print "Truncating rows"
             
            self.x_train = self.x_train[0:nrows]
            self.x_test = self.x_test[0:nrows]
            self.x_valid = self.x_valid[0:nrows]

            self.y_valid = self.y_valid[0:nrows]
            self.y_train = self.y_train[0:nrows]
            self.y_test = self.y_test[0:nrows]
Esempio n. 4
0
    linmat = {}
    np.random.seed(0)
    linmat['Wtrans_10']  = np.random.randn(10,10)*0.05
    linmat['Wtrans_100'] = np.random.randn(100,100)*0.05
    linmat['Wtrans_250'] = np.random.randn(250,250)*0.05
    linmat['btrans_10']  = np.random.randn(10,)*0.05
    linmat['btrans_100'] = np.random.randn(100,)*0.05
    linmat['btrans_250'] = np.random.randn(250,)*0.05
    linmat['Wobs_10']  = np.random.randn(10,20)*0.05
    linmat['Wobs_100'] = np.random.randn(100,200)*0.05
    linmat['Wobs_250'] = np.random.randn(250,500)*0.05
    saveHDF5(SAVEDIR+'/linear-matrices.h5',linmat)
    saved_matrices = linmat
else:
    print 'Loading linear matrices'
    saved_matrices = loadHDF5(SAVEDIR+'/linear-matrices.h5')

if not os.path.exists(SAVEDIR+'/linear-matrices-2.h5'):
    os.system('mkdir -p '+SAVEDIR)
    print 'Creating linear matrices'
    linmat = {}
    np.random.seed(0)
    linmat['Wtrans_10']  = np.random.randn(10,10)*0.05
    linmat['Wtrans_100'] = np.random.randn(100,100)*0.05
    linmat['Wtrans_250'] = np.random.randn(250,250)*0.05
    linmat['btrans_10']  = np.random.randn(10,)*0.05
    linmat['btrans_100'] = np.random.randn(100,)*0.05
    linmat['btrans_250'] = np.random.randn(250,)*0.05
    linmat['Wobs_10']  = np.random.randn(10,10)*0.05
    linmat['Wobs_100'] = np.random.randn(100,100)*0.05
    linmat['Wobs_250'] = np.random.randn(250,250)*0.05
Esempio n. 5
0
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['legend.fontsize'] = 20

import glob, os, sys, time
import numpy as np
sys.path.append('../')
from utils.misc import getConfigFile, readPickle, displayTime, loadHDF5
start_time = time.time()
from model_th.dmm import DMM
import model_th.learning as DMM_learn
import model_th.evaluate as DMM_evaluate
displayTime('importing DMM', start_time, time.time())

#Lets look at the statistics saved at epoch 40
stats = loadHDF5(
    './chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5'
)
print[(k, stats[k].shape) for k in stats.keys()]

plt.figure(figsize=(8, 10))
plt.plot(stats['train_bound'][:, 0],
         stats['train_bound'][:, 1],
         '-o',
         color='g',
         label='Train')
plt.plot(stats['valid_bound'][:, 0],
         stats['valid_bound'][:, 1],
         '-*',
         color='b',
         label='Validate')
plt.legend()
Esempio n. 6
0
"""
import glob
from utils.misc import loadHDF5, getConfigFile, readPickle
DIR = './'
datasets = ['20newsgroups', 'rcv2']
result_best = {}
result_last = {}
for dataset in datasets:
    print 'Dataset: ', dataset
    for f in glob.glob(DIR + '/chkpt-' + dataset + '-*/*evaluate.h5'):
        if 'mnist' in f or 'qvary' in f:
            continue
        dataset = f.split('chkpt-')[1].split('-')[0]
        opt_type = f.split('chkpt-')[1].split('-')[1].split('/')[0]
        params = readPickle(getConfigFile(f.replace('evaluate.h5', '')))[0]
        dset = loadHDF5(f)
        if params['opt_type'] == 'finopt':
            name = str(params['p_layers']) + '-M' + str(
                params['n_steps']) + '-' + params['input_type']
        else:
            name = str(params['p_layers']) + '-M1-' + params['input_type']
        result_best[params['dataset'] + '-' + name] = (dset['perp_0_best'],
                                                       dset['perp_f_best'])
        result_last[params['dataset'] + '-' + name] = (dset['test_perp_0'],
                                                       dset['test_perp_f'])
        print name, (dset['perp_0_best'], dset['perp_f_best'])
for dataset in datasets:
    for itype in ['normalize', 'tfidf']:
        for layer in ['0', '2']:
            for M in ['M1', 'M100']:
                name = dataset + '-' + layer + '-' + M + '-' + itype
Esempio n. 7
0
"""
Compile aggregate timing information for different runs
"""
import glob,os
import numpy as np
from utils.misc import loadHDF5,getConfigFile,readPickle

for f in glob.glob('./chkpt-*/*-EP50-stats.h5'):
    code = 'ds'+os.path.basename(f).split('-ql')[0].split('ds')[1]
    if 'finopt' in f:
        code = 'finopt-'+code
    else:
        code = 'none-'+code
    data = loadHDF5(f)
    params=readPickle(getConfigFile(f))[0]
    code = params['dataset']+'-'+code
    runtimes = [] 
    for edata in data['batch_time']:
        if int(edata[0])%params['savefreq']==0:
            continue
        else:
            runtimes.append(edata[1])
    print code, np.mean(runtimes)
Esempio n. 8
0
from midi.utils import midiread, midiwrite
# assert os.system('timidity -h')==0,'Install Timidity from http://timidity.sourceforge.net/'

# ========================================

# #change the dataset to one of ['jsb','nottingham','musedata','piano']
# DATASET= 'jsb'
# DATASET= 'ipython'
DATASET = 'synthetic'
DIR = '../expt/chkpt-' + DATASET + '/'
# DIR    = './chkpt-'+DATASET+'/'
# assert os.path.exists('../expt/chkpt-'+DATASET+'/'),'Run the shell files in ../expt first'
# prefix = 'DMM_lr-0_0008-dh-200-ds-100-nl-relu-bs-20-ep-2000-rs-600-rd-0_1-infm-R-tl-2-el-2-ar-2000_0-use_p-approx-rc-lstm-DKF-ar'
prefix = 'DMM_lr-0_0008-dh-200-ds-100-nl-relu-bs-20-ep-20-rs-600-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid'
# prefix = 'DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid'
stats = loadHDF5(os.path.join(DIR, prefix + '-final.h5'))
# stats  = loadHDF5(os.path.join('chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5'))
epochMin, valMin, idxMin = getLowestError(stats['valid_bound'])
pfile = os.path.join(DIR, prefix + '-config.pkl')

params = readPickle(pfile, quiet=True)[0]
print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile)
EP = '-EP' + str(int(epochMin))
reloadFile = os.path.join(DIR, prefix + EP + '-params.npz')
print 'Model parameters in: ', reloadFile
#Don't load the training functions for the model since its time consuming
params['validate_only'] = True
dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile)

# forViz/chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5