def load_model_elbo(): # Lets look at the statistics saved at epoch 40 stats = loadHDF5('./chkpt/lorenz/DMM_lr-0_0008-dh-40-ds-'+str(DIM_STOCHASTIC)+'-nl-relu-bs-200-ep-1000-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP990-stats.h5') print [(k, stats[k].shape) for k in stats.keys()] plt.figure(figsize=(8, 10)) plt.plot(stats['train_bound'][:, 0], stats['train_bound'][:, 1], '-o', color='g', label='Train') plt.plot(stats['valid_bound'][:, 0], stats['valid_bound'][:, 1], '-*', color='b', label='Validate') plt.legend() plt.xlabel('Epochs') plt.ylabel('Upper Bound on $-\log p(x)$') plt.show()
def plot_loss(unique_id): stats = loadHDF5('./dmm_models/{}-EP100-stats.h5'.format(unique_id)) plt.plot(stats['train_bound'][:, 0], stats['train_bound'][:, 1], '-o', color='g', label='Train') plt.plot(stats['valid_bound'][:, 0], stats['valid_bound'][:, 1], '-*', color='b', label='Validate') plt.legend() plt.xlabel('Epochs') plt.ylabel('Upper Bound on $-\log p(x)$') plt.savefig('./dmm_models/{}-EP100-loss.png'.format(unique_id), format='png')
def load_from_hdf5_latent(self, dname = "mimic-cancer", feat_name='mu', ssi=False, cohort=False, nrows=None): #Load the latent representatinon instead of the raw features representations = loadHDF5('/data/ml2/vishakh/SHARED/representations.h5') # we need the labels anyways and we still care which class data_dict = loadDataset(dname); if ssi: feat_name = 'ssi-'+feat_name #only xs change self.x_train = representations['train-vae-' + feat_name] self.x_test = representations['test-vae-' + feat_name] self.x_valid = representations['valid-vae-' + feat_name] if cohort: self.y_valid = data_dict['valid_c'] self.y_train = data_dict['train_c'] self.y_test = data_dict['test_c'] else: self.y_valid = data_dict['valid_y'] self.y_train = data_dict['train_y'] self.y_test = data_dict['test_y'] if nrows is not None: print "Truncating rows" self.x_train = self.x_train[0:nrows] self.x_test = self.x_test[0:nrows] self.x_valid = self.x_valid[0:nrows] self.y_valid = self.y_valid[0:nrows] self.y_train = self.y_train[0:nrows] self.y_test = self.y_test[0:nrows]
linmat = {} np.random.seed(0) linmat['Wtrans_10'] = np.random.randn(10,10)*0.05 linmat['Wtrans_100'] = np.random.randn(100,100)*0.05 linmat['Wtrans_250'] = np.random.randn(250,250)*0.05 linmat['btrans_10'] = np.random.randn(10,)*0.05 linmat['btrans_100'] = np.random.randn(100,)*0.05 linmat['btrans_250'] = np.random.randn(250,)*0.05 linmat['Wobs_10'] = np.random.randn(10,20)*0.05 linmat['Wobs_100'] = np.random.randn(100,200)*0.05 linmat['Wobs_250'] = np.random.randn(250,500)*0.05 saveHDF5(SAVEDIR+'/linear-matrices.h5',linmat) saved_matrices = linmat else: print 'Loading linear matrices' saved_matrices = loadHDF5(SAVEDIR+'/linear-matrices.h5') if not os.path.exists(SAVEDIR+'/linear-matrices-2.h5'): os.system('mkdir -p '+SAVEDIR) print 'Creating linear matrices' linmat = {} np.random.seed(0) linmat['Wtrans_10'] = np.random.randn(10,10)*0.05 linmat['Wtrans_100'] = np.random.randn(100,100)*0.05 linmat['Wtrans_250'] = np.random.randn(250,250)*0.05 linmat['btrans_10'] = np.random.randn(10,)*0.05 linmat['btrans_100'] = np.random.randn(100,)*0.05 linmat['btrans_250'] = np.random.randn(250,)*0.05 linmat['Wobs_10'] = np.random.randn(10,10)*0.05 linmat['Wobs_100'] = np.random.randn(100,100)*0.05 linmat['Wobs_250'] = np.random.randn(250,250)*0.05
mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['legend.fontsize'] = 20 import glob, os, sys, time import numpy as np sys.path.append('../') from utils.misc import getConfigFile, readPickle, displayTime, loadHDF5 start_time = time.time() from model_th.dmm import DMM import model_th.learning as DMM_learn import model_th.evaluate as DMM_evaluate displayTime('importing DMM', start_time, time.time()) #Lets look at the statistics saved at epoch 40 stats = loadHDF5( './chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5' ) print[(k, stats[k].shape) for k in stats.keys()] plt.figure(figsize=(8, 10)) plt.plot(stats['train_bound'][:, 0], stats['train_bound'][:, 1], '-o', color='g', label='Train') plt.plot(stats['valid_bound'][:, 0], stats['valid_bound'][:, 1], '-*', color='b', label='Validate') plt.legend()
""" import glob from utils.misc import loadHDF5, getConfigFile, readPickle DIR = './' datasets = ['20newsgroups', 'rcv2'] result_best = {} result_last = {} for dataset in datasets: print 'Dataset: ', dataset for f in glob.glob(DIR + '/chkpt-' + dataset + '-*/*evaluate.h5'): if 'mnist' in f or 'qvary' in f: continue dataset = f.split('chkpt-')[1].split('-')[0] opt_type = f.split('chkpt-')[1].split('-')[1].split('/')[0] params = readPickle(getConfigFile(f.replace('evaluate.h5', '')))[0] dset = loadHDF5(f) if params['opt_type'] == 'finopt': name = str(params['p_layers']) + '-M' + str( params['n_steps']) + '-' + params['input_type'] else: name = str(params['p_layers']) + '-M1-' + params['input_type'] result_best[params['dataset'] + '-' + name] = (dset['perp_0_best'], dset['perp_f_best']) result_last[params['dataset'] + '-' + name] = (dset['test_perp_0'], dset['test_perp_f']) print name, (dset['perp_0_best'], dset['perp_f_best']) for dataset in datasets: for itype in ['normalize', 'tfidf']: for layer in ['0', '2']: for M in ['M1', 'M100']: name = dataset + '-' + layer + '-' + M + '-' + itype
""" Compile aggregate timing information for different runs """ import glob,os import numpy as np from utils.misc import loadHDF5,getConfigFile,readPickle for f in glob.glob('./chkpt-*/*-EP50-stats.h5'): code = 'ds'+os.path.basename(f).split('-ql')[0].split('ds')[1] if 'finopt' in f: code = 'finopt-'+code else: code = 'none-'+code data = loadHDF5(f) params=readPickle(getConfigFile(f))[0] code = params['dataset']+'-'+code runtimes = [] for edata in data['batch_time']: if int(edata[0])%params['savefreq']==0: continue else: runtimes.append(edata[1]) print code, np.mean(runtimes)
from midi.utils import midiread, midiwrite # assert os.system('timidity -h')==0,'Install Timidity from http://timidity.sourceforge.net/' # ======================================== # #change the dataset to one of ['jsb','nottingham','musedata','piano'] # DATASET= 'jsb' # DATASET= 'ipython' DATASET = 'synthetic' DIR = '../expt/chkpt-' + DATASET + '/' # DIR = './chkpt-'+DATASET+'/' # assert os.path.exists('../expt/chkpt-'+DATASET+'/'),'Run the shell files in ../expt first' # prefix = 'DMM_lr-0_0008-dh-200-ds-100-nl-relu-bs-20-ep-2000-rs-600-rd-0_1-infm-R-tl-2-el-2-ar-2000_0-use_p-approx-rc-lstm-DKF-ar' prefix = 'DMM_lr-0_0008-dh-200-ds-100-nl-relu-bs-20-ep-20-rs-600-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid' # prefix = 'DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid' stats = loadHDF5(os.path.join(DIR, prefix + '-final.h5')) # stats = loadHDF5(os.path.join('chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5')) epochMin, valMin, idxMin = getLowestError(stats['valid_bound']) pfile = os.path.join(DIR, prefix + '-config.pkl') params = readPickle(pfile, quiet=True)[0] print 'Hyperparameters in: ', pfile, 'Found: ', os.path.exists(pfile) EP = '-EP' + str(int(epochMin)) reloadFile = os.path.join(DIR, prefix + EP + '-params.npz') print 'Model parameters in: ', reloadFile #Don't load the training functions for the model since its time consuming params['validate_only'] = True dmm_reloaded = DMM(params, paramFile=pfile, reloadFile=reloadFile) # forViz/chkpt-ipython/DMM_lr-0_0008-dh-40-ds-2-nl-relu-bs-200-ep-40-rs-80-rd-0_1-infm-R-tl-2-el-2-ar-2_0-use_p-approx-rc-lstm-uid-EP30-stats.h5