def run_model(): # Loads/generate data dataset = load_lorenz() params = set_parameters(dataset) print(params) print 'Dimensionality of the observations: ', dataset['dim_observations'] print 'Data type of features:', dataset['data_type'] for dtype in ['train','valid','test']: print 'dtype: ',dtype, ' type(dataset[dtype]): ', type(dataset[dtype]) print [(k,type(dataset[dtype][k]), dataset[dtype][k].shape) for k in dataset[dtype]] print '--------\n' # Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) # savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=params['epochs'], batch_size=200, savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True)
def run_model(dataset, params): set_extra_parameters(params=params, dataset=dataset) for key, value in params.items(): print(key, value) # Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) # savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=params['epochs'], batch_size=params['batch_size'], savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True)
params['dim_observations'] = dataset['dim_observations'] #The dataset is small, lets change some of the default parameters and the unique ID params['dim_stochastic'] = 2 params['dim_hidden'] = 40 params['rnn_size'] = 80 params['epochs'] = 40 params['batch_size'] = 200 params['unique_id'] = params['unique_id'].replace('ds-100','ds-2').replace('dh-200','dh-40').replace('rs-600','rs-80') params['unique_id'] = params['unique_id'].replace('ep-2000','ep-40').replace('bs-20','bs-200') #Create a temporary directory to save checkpoints params['savedir'] = params['savedir']+'synthetic/' os.system('mkdir -p '+params['savedir']) #Specify the file where `params` corresponding for this choice of model and data will be saved pfile= params['savedir']+'/'+params['unique_id']+'-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile = pfile) #savef specifies the prefix for the checkpoints - we'll use the same save directory as before savef = os.path.join(params['savedir'],params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start =0 , epoch_end = params['epochs'], batch_size = 200, savefreq = params['savefreq'], savefile = savef, dataset_eval=dataset['valid'], shuffle = True )
def train_dmm(train_data, test_data, cols, dim_latent, unique_id, time_col='time_since_transplant', id_col='TRR_ID'): df = pandas.concat([train_data, test_data]) # Format time so that it can be indexed df['time2'] = df[time_col].apply(mod_time) # This is different than time since transplant since some people miss albumin, bilirubin, # creatinine or acute rej episode at time of transplant df['t_elapsed'] = df.groupby( id_col, group_keys=False).apply(lambda g: g.time2 - g.time2.min()) df.t_elapsed = df.t_elapsed.astype(int) nontest_ids = train_data[id_col].drop_duplicates() train_ids = nontest_ids.sample(frac=0.9) train = df[df[id_col].isin(train_ids)] val = df[np.logical_not(df[id_col].isin(train_ids))] test = df[df[id_col].isin(test_data[id_col])] # Reformat to a matrix x_train, id_train = df_to_padded(df=train, column_names=cols, id_col=id_col, t_col='t_elapsed') x_val, id_val = df_to_padded(df=val, column_names=cols, id_col=id_col, t_col='t_elapsed', max_seq_len=x_train.shape[1]) x_test, id_test = df_to_padded(df=test, column_names=cols, id_col=id_col, t_col='t_elapsed', max_seq_len=x_train.shape[1]) mask_value = -1.3371337 x_train_masked = x_train.copy() x_train_masked[np.isnan(x_train_masked)] = mask_value x_val_masked = x_val.copy() x_val_masked[np.isnan(x_val_masked)] = mask_value x_test_masked = x_test.copy() x_test_masked[np.isnan(x_test_masked)] = mask_value dataset = { 'dim_observations': cols.shape[0], 'data_type': 'real', 'train': { 'tensor': x_train_masked, 'mask': np.logical_not(np.isnan(x_train[:, :, 0])), 'id': id_train }, 'valid': { 'tensor': x_val_masked, 'mask': np.logical_not(np.isnan(x_val[:, :, 0])), 'id': id_val }, 'test': { 'tensor': x_test_masked, 'mask': np.logical_not(np.isnan(x_test[:, :, 0])), 'id': id_test }, } max_visits = x_train.shape[1] params = { 'dim_observations': dataset['dim_observations'], 'data_type': dataset['data_type'], 'dataset': 'srtr', 'epochs': 10, 'seed': 1, 'init_weight': 0.1, 'dim_stochastic': dim_latent, 'expt_name': 'something', 'reg_value': 0.05, 'reloadFile': './NOSUCHFILE', 'reg_spec': '_', 'dim_hidden': max_visits, 'lr': 0.0008, 'reg_type': 'l2', 'init_scheme': 'uniform', 'optimizer': 'adam', 'use_generative_prior': 'approx', 'maxout_stride': 4, 'batch_size': 512, 'savedir': './dmm_models', 'forget_bias': -5.0, 'inference_model': 'R', 'emission_layers': 2, 'savefreq': 100, 'rnn_cell': 'lstm', 'rnn_size': max_visits, 'paramFile': './NOSUCHFILE', 'nonlinearity': 'relu', 'rnn_dropout': 0.1, 'transition_layers': 2, 'anneal_rate': 2.0, 'debug': False, 'validate_only': False, 'transition_type': 'mlp', 'unique_id': unique_id, 'leaky_param': 0.0 } # Create a temporary directory to save checkpoints os.system('mkdir -p ' + params['savedir']) # Specify the file where `params` corresponding for this choice of model and data will be saved pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl' print 'Checkpoint prefix: ', pfile dmm = DMM(params, paramFile=pfile) savef = os.path.join(params['savedir'], params['unique_id']) savedata = DMM_learn.learn(dmm, dataset['train'], epoch_start=0, epoch_end=101, batch_size=params['batch_size'], savefreq=params['savefreq'], savefile=savef, dataset_eval=dataset['valid'], shuffle=True) return savedata