def compare_mlps( X_train, Y_train, X_test, Y_test, hidden_size=HIDDEN_SIZE, batch_size=BATCH_SIZE, n_iters=N_ITERS, ): # Reshape if the training data is 3d tensors if X_train.ndim == 3: X_train = X_train.reshape( (X_train.shape[0], X_train.shape[1] * X_train.shape[2])) X_test = X_test.reshape( (X_test.shape[0], X_test.shape[1] * X_test.shape[2])) layers = [ (X_train.shape[1], hidden_size), (hidden_size, hidden_size), (hidden_size, Y_train.shape[1]), ] mlp = MLP(layers) bnn = BNN(layers) y_true = np.argmax(Y_test, axis=1) mlp_accuracies = [] bnn_accuracies = [] for epoch in range(n_iters): # Train models print 'Epoch: {}'.format(epoch) X_train, Y_train = shuffle(X_train, Y_train) mlp.batch_train(X_train, Y_train, batch_size) bnn.batch_train(X_train, Y_train, batch_size) # Test models Y_hats = mlp.batch_predict(X_test, batch_size) y_guess = np.argmax(Y_hats, axis=1) mlp_acc = accuracy_score(y_true, y_guess) print 'MLP:\t{}'.format(mlp_acc) mlp_accuracies.append(mlp_acc) Y_hats = bnn.batch_predict(X_test, batch_size) y_guess = np.argmax(Y_hats, axis=1) bnn_acc = accuracy_score(y_true, y_guess) print 'BNN:\t{}'.format(bnn_acc) bnn_accuracies.append(bnn_acc) return mlp_accuracies, bnn_accuracies
def __init__(self, num_channels=3, img_size=(64, 64), method='naive language', batch_size=5): super(ODE, self).__init__() self.encoder = Encoder(num_channels=num_channels) self.rnn = RNN_FeatEncoder(batch_size=batch_size) self.decoder = Decoder(num_channels=num_channels) self.bnn = BNN(256 + 16, 256, act='tanh', n_hidden=128, bnn=False) self.bnn.draw_f() self.language_encoder = Language_Encoder() self.img_size = img_size self.method = method self.lan_mu_proj = nn.Linear(128, 128) self.lan_var_proj = nn.Linear(128, 128) self.vid_feat_proj = nn.Linear(256, 128) self.batch_size = batch_size
def trainNN(data, nn_hyperParams, train_hyperParams, param_save_path, logFile=None): N_train, d = data['train'][0].shape N_valid, d = data['valid'][0].shape nTrainBatches = N_train / train_hyperParams['batchSize'] nValidBatches = N_valid / train_hyperParams['batchSize'] # init Mix Density VAE model = BNN(nn_hyperParams) # get training op optimizer = tf.train.AdamOptimizer(train_hyperParams['adamLr']).minimize( model.loss_fn) # get op to save the model persister = tf.train.Saver() with tf.Session(config=train_hyperParams['tf_config']) as s: s.run(tf.initialize_all_variables()) # for early stopping best_loss = 10000000. best_epoch = 0 for epoch_idx in xrange(train_hyperParams['nEpochs']): # training train_loss = 0. for batch_idx in xrange(nTrainBatches): x = data['train'][0][batch_idx * train_hyperParams['batchSize']: (batch_idx + 1) * train_hyperParams['batchSize'], :] y = data['train'][1][batch_idx * train_hyperParams['batchSize']: (batch_idx + 1) * train_hyperParams['batchSize'], :] _, loss_fn_val = s.run([optimizer, model.loss_fn], { model.X: x, model.Y: y }) train_loss += loss_fn_val # validation valid_loss = 0. for batch_idx in xrange(nValidBatches): x = data['valid'][0][batch_idx * train_hyperParams['batchSize']: (batch_idx + 1) * train_hyperParams['batchSize'], :] y = data['valid'][1][batch_idx * train_hyperParams['batchSize']: (batch_idx + 1) * train_hyperParams['batchSize'], :] valid_loss += s.run(model.loss_fn, {model.X: x, model.Y: y}) # check for ELBO improvement star_printer = "" train_loss /= nTrainBatches valid_loss /= nValidBatches if valid_loss < best_loss: best_loss = valid_loss best_epoch = epoch_idx star_printer = "***" # save the parameters persister.save(s, param_save_path) # log training progress logging_str = "Epoch %d. Train Loss: %.3f, Validation Loss: %.3f %s" % ( epoch_idx + 1, train_loss, valid_loss, star_printer) print logging_str if logFile: logFile.write(logging_str + "\n") logFile.flush() # check for convergence if epoch_idx - best_epoch > train_hyperParams[ 'lookahead_epochs'] or np.isnan(train_loss): break return model
def construct_forward_model(obs_dim=11, act_dim=3, rew_dim=1, hidden_dim=200, num_networks=7, num_elites=5, session=None): print( '[ BNN ] Observation dim {} | Action dim: {} | Hidden dim: {}'.format( obs_dim, act_dim, hidden_dim)) params = { 'name': 'BNN', 'num_networks': num_networks, 'num_elites': num_elites, 'sess': session } model = BNN(params) model.add( FC(hidden_dim, input_dim=obs_dim + act_dim, activation="swish", weight_decay=0.000025)) model.add(FC(hidden_dim, activation="swish", weight_decay=0.00005)) model.add(FC(hidden_dim, activation="swish", weight_decay=0.000075)) model.add(FC(hidden_dim, activation="swish", weight_decay=0.000075)) model.add(FC(obs_dim + rew_dim, weight_decay=0.0001)) model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}) return model
def trainBNN(data, bnn_hyperParams, hyperParams, logFile=None, outfile_base_name="", y_scalers=None): n_splits, N_train, d = data['train']['x'].shape nTrainBatches = int(N_train/hyperParams['batchSize']) bnn_hyperParams['batchSize'] = hyperParams['batchSize'] # init Bayes NN model = BNN(bnn_hyperParams) # get training op optimizer = tf.train.AdamOptimizer(hyperParams['adamLr']).minimize(-model.elbo_obj) test_log_likelihoods = [] test_rmses = [] with tf.Session(config=hyperParams['tf_config']) as s: for split_idx in range(n_splits): s.run(tf.initialize_all_variables()) best_elbo = -10000000. best_epoch = 0 ### TRAIN MODEL ### for epoch_idx in range(hyperParams['nEpochs']): #shuffle data after every epoch training_idxs = list(range(N_train)) shuffle(training_idxs) data['train']['x'][split_idx, :, :] = data['train']['x'][split_idx, training_idxs, :] data['train']['y'][split_idx, :, :] = data['train']['y'][split_idx, training_idxs, :] # training train_elbo = 0. exp_ll = 0. kld = 0. for batch_idx in range(nTrainBatches): x = data['train']['x'][split_idx, batch_idx*hyperParams['batchSize']:(batch_idx+1)*hyperParams['batchSize'], :] y = data['train']['y'][split_idx, batch_idx*hyperParams['batchSize']:(batch_idx+1)*hyperParams['batchSize'], :] _, elbo_val, exp_ll_val, kld_val = s.run([optimizer, model.elbo_obj, model.exp_ll, model.kld], {model.X: x, model.Y: y}) # if batch_idx == 0: print(str(os)) train_elbo += elbo_val exp_ll += exp_ll_val kld += kld_val # check for ELBO improvement star_printer = "" train_elbo /= nTrainBatches exp_ll /= nTrainBatches kld /= nTrainBatches if train_elbo > best_elbo: best_elbo = train_elbo best_epoch = epoch_idx star_printer = "***" if (epoch_idx+1) % 500 == 0: # log training progress logging_str = "Epoch %d. Expected LL: %.3f, KLD: %.3f, Train ELBO: %.3f %s" %(epoch_idx+1, exp_ll, kld, train_elbo, star_printer) print(logging_str) if logFile: logFile.write(logging_str + "\n") logFile.flush() ### SAVE WEIGHTS TO INSPECT SHRINKAGE BEHAVIOR if split_idx % 5 == 0: weight_matrices = {} weight_matrices['mus'] = [s.run(m) for m in model.params['mu']] weight_matrices['sigmas'] = [s.run(sig) for sig in model.params['sigma']] cp.dump(weight_matrices, open(inArgs.experimentDir+"/params/weights_post_"+outfile_base_name+"_splitIdx_"+str(split_idx)+".pkl", "wb")) ### TEST MODEL ### test_rmse, test_ll = s.run(model.get_test_metrics(500, y_mu=y_scalers[split_idx].mean_, y_scale=y_scalers[split_idx].scale_, likelihood_noise_prec=bnn_hyperParams['like_noise_prec']), {model.X: data['test']['x'][split_idx,:,:], model.Y: data['test']['y'][split_idx,:,:]}) test_log_likelihoods.append( test_ll ) test_rmses.append( test_rmse ) logging_str = "\n\nRun #%d, Test RMSE: %.3f, Test Log Likelihood: %.3f \n\n" %(split_idx, test_rmses[-1], test_log_likelihoods[-1]) print(logging_str) logging_file.write(logging_str+"\n") logging_str = "\n\n\n\n Avg Test RMSE: %.3f +- %.3f, Avg Test Log Likelihood: %.3f +- %.3f" %(np.mean(test_rmses), np.std(test_rmses), np.mean(test_log_likelihoods), np.std(test_log_likelihoods)) print(logging_str) logging_file.write(logging_str+"\n")
def test_compilation(self): clf = BNN([(300, 200), (200, 100)])
def construct_model(sess, obs_dim, act_dim, model_hyperparams): # context_dim = 5 , rew_dim=1, hidden_dim=200, # ada_state_dynamics_pred = True, ada_rew_pred = True, # fast_adapt_steps = 2 , fast_adapt_lr = 0.01, # reg_weight = 1, pred_dynamics = True, fixed_preupdate_context = True, num_networks=1, num_elites=1): # output_dim = rew_dim + obs_dim model = BNN(sess, obs_dim, act_dim, model_hyperparams) # ada_state_dynamics_pred, ada_rew_pred, # fast_adapt_steps , fast_adapt_lr, reg_weight , fixed_preupdate_context ) model.add( FC(model.hidden_dim, input_dim=obs_dim + act_dim + model.context_dim, activation="swish", weight_decay=0.000025)) model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.00005)) model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.000075)) model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.000075)) model.add(FC(model.output_dim, weight_decay=0.0001)) # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}) model.build_graph() return model
class ODE(nn.Module): def __init__(self, num_channels=3, img_size=(64, 64), method='naive language', batch_size=5): super(ODE, self).__init__() self.encoder = Encoder(num_channels=num_channels) self.rnn = RNN_FeatEncoder(batch_size=batch_size) self.decoder = Decoder(num_channels=num_channels) self.bnn = BNN(256 + 16, 256, act='tanh', n_hidden=128, bnn=False) self.bnn.draw_f() self.language_encoder = Language_Encoder() self.img_size = img_size self.method = method self.lan_mu_proj = nn.Linear(128, 128) self.lan_var_proj = nn.Linear(128, 128) self.vid_feat_proj = nn.Linear(256, 128) self.batch_size = batch_size def forward(self, input, pos_language=None, neg_language=None): """ input is video trajectory in training, or single image in testing. language is language feature from VisualCommet """ if pos_language is None: batchsize = input.shape[0] latent_total = self.encoder(input).view([batchsize, -1]) state = torch.randn(batchsize, 128).to(device) init_state = latent_total else: batchsize = input.shape[0] if input.shape[1] == 1: # Only given the first image if self.method == "naive language": latent_total = self.encoder(input.squeeze(1)).view( [batchsize, 1, -1]) language_feature = self.language_encoder(pos_language) state = torch.randn(batchsize, 128).to( device) * language_feature + language_feature init_state = latent_total[:, 0] elif self.method == 'nce': latent_total = self.encoder(input.squeeze(1)).view( [batchsize, 1, -1]) language_feature = self.language_encoder(pos_language) mu = self.lan_mu_proj(language_feature) logvar = self.lan_var_proj(language_feature) state = self.decoder.reparametrize(mu, logvar) init_state = latent_total[:, 0] else: contrastive_loss = 0 # Training time, given a sequence latent_total = self.encoder( input.reshape([batchsize * 10, 3, 64, 64])).view([batchsize, 10, -1]) h = self.rnn.init_h.to(device) c = self.rnn.init_c.to(device) for i in reversed(range(10)): latent, h, c = self.rnn( latent_total[:, i:i + 1, :].permute(1, 0, 2), h, c) state_dist = self.rnn.linearlay(latent.view([batchsize, 256])) if self.method == 'naive language': mu = state_dist[:, :128] logvar = state_dist[:, 128:] language_feature = self.language_encoder(pos_language) video_latent = self.decoder.reparametrize(mu, logvar) state = video_latent * language_feature + language_feature init_state = latent_total[:, 0] elif self.method == 'nce': video_feature = self.vid_feat_proj(state_dist) pos_language_feature = self.language_encoder(pos_language) neg_language_feature = self.language_encoder(neg_language) pos_sim = torch.sum(video_feature * pos_language_feature, dim=1) neg_sim = torch.sum(video_feature * neg_language_feature, dim=1) contrastive_loss = torch.mean( torch.exp(neg_sim) / (torch.exp(pos_sim) + 1e-10)) mu = self.lan_mu_proj(pos_language_feature) logvar = self.lan_var_proj(neg_language_feature) state = self.decoder.reparametrize(mu, logvar) init_state = latent_total[:, 0] concat_state = torch.cat([state, init_state], 1) ts1 = torch.tensor(np.linspace(1, 10, 10)).to(device) output_latent = odeint(self.bnn, concat_state, ts1) recon = self.decoder(output_latent.reshape( [batchsize * 10, 256])).reshape([batchsize, 10, 3, 64, 64]) if input.shape[1] == 1: return recon else: return recon, mu, logvar, contrastive_loss