def compare_mlps(
    X_train,
    Y_train,
    X_test,
    Y_test,
    hidden_size=HIDDEN_SIZE,
    batch_size=BATCH_SIZE,
    n_iters=N_ITERS,
):
    # Reshape if the training data is 3d tensors
    if X_train.ndim == 3:
        X_train = X_train.reshape(
            (X_train.shape[0], X_train.shape[1] * X_train.shape[2]))
        X_test = X_test.reshape(
            (X_test.shape[0], X_test.shape[1] * X_test.shape[2]))

    layers = [
        (X_train.shape[1], hidden_size),
        (hidden_size, hidden_size),
        (hidden_size, Y_train.shape[1]),
    ]

    mlp = MLP(layers)
    bnn = BNN(layers)
    y_true = np.argmax(Y_test, axis=1)

    mlp_accuracies = []
    bnn_accuracies = []

    for epoch in range(n_iters):
        # Train models
        print 'Epoch: {}'.format(epoch)
        X_train, Y_train = shuffle(X_train, Y_train)
        mlp.batch_train(X_train, Y_train, batch_size)
        bnn.batch_train(X_train, Y_train, batch_size)

        # Test models
        Y_hats = mlp.batch_predict(X_test, batch_size)
        y_guess = np.argmax(Y_hats, axis=1)
        mlp_acc = accuracy_score(y_true, y_guess)
        print 'MLP:\t{}'.format(mlp_acc)
        mlp_accuracies.append(mlp_acc)

        Y_hats = bnn.batch_predict(X_test, batch_size)
        y_guess = np.argmax(Y_hats, axis=1)
        bnn_acc = accuracy_score(y_true, y_guess)
        print 'BNN:\t{}'.format(bnn_acc)
        bnn_accuracies.append(bnn_acc)

    return mlp_accuracies, bnn_accuracies
Exemple #2
0
 def __init__(self,
              num_channels=3,
              img_size=(64, 64),
              method='naive language',
              batch_size=5):
     super(ODE, self).__init__()
     self.encoder = Encoder(num_channels=num_channels)
     self.rnn = RNN_FeatEncoder(batch_size=batch_size)
     self.decoder = Decoder(num_channels=num_channels)
     self.bnn = BNN(256 + 16, 256, act='tanh', n_hidden=128, bnn=False)
     self.bnn.draw_f()
     self.language_encoder = Language_Encoder()
     self.img_size = img_size
     self.method = method
     self.lan_mu_proj = nn.Linear(128, 128)
     self.lan_var_proj = nn.Linear(128, 128)
     self.vid_feat_proj = nn.Linear(256, 128)
     self.batch_size = batch_size
Exemple #3
0
def trainNN(data,
            nn_hyperParams,
            train_hyperParams,
            param_save_path,
            logFile=None):

    N_train, d = data['train'][0].shape
    N_valid, d = data['valid'][0].shape
    nTrainBatches = N_train / train_hyperParams['batchSize']
    nValidBatches = N_valid / train_hyperParams['batchSize']

    # init Mix Density VAE
    model = BNN(nn_hyperParams)

    # get training op
    optimizer = tf.train.AdamOptimizer(train_hyperParams['adamLr']).minimize(
        model.loss_fn)

    # get op to save the model
    persister = tf.train.Saver()

    with tf.Session(config=train_hyperParams['tf_config']) as s:
        s.run(tf.initialize_all_variables())

        # for early stopping
        best_loss = 10000000.
        best_epoch = 0

        for epoch_idx in xrange(train_hyperParams['nEpochs']):

            # training
            train_loss = 0.
            for batch_idx in xrange(nTrainBatches):
                x = data['train'][0][batch_idx *
                                     train_hyperParams['batchSize']:
                                     (batch_idx + 1) *
                                     train_hyperParams['batchSize'], :]
                y = data['train'][1][batch_idx *
                                     train_hyperParams['batchSize']:
                                     (batch_idx + 1) *
                                     train_hyperParams['batchSize'], :]

                _, loss_fn_val = s.run([optimizer, model.loss_fn], {
                    model.X: x,
                    model.Y: y
                })
                train_loss += loss_fn_val

            # validation
            valid_loss = 0.
            for batch_idx in xrange(nValidBatches):
                x = data['valid'][0][batch_idx *
                                     train_hyperParams['batchSize']:
                                     (batch_idx + 1) *
                                     train_hyperParams['batchSize'], :]
                y = data['valid'][1][batch_idx *
                                     train_hyperParams['batchSize']:
                                     (batch_idx + 1) *
                                     train_hyperParams['batchSize'], :]

                valid_loss += s.run(model.loss_fn, {model.X: x, model.Y: y})

            # check for ELBO improvement
            star_printer = ""
            train_loss /= nTrainBatches
            valid_loss /= nValidBatches
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_epoch = epoch_idx
                star_printer = "***"
                # save the parameters
                persister.save(s, param_save_path)

            # log training progress
            logging_str = "Epoch %d.  Train Loss: %.3f,  Validation Loss: %.3f %s" % (
                epoch_idx + 1, train_loss, valid_loss, star_printer)
            print logging_str
            if logFile:
                logFile.write(logging_str + "\n")
                logFile.flush()

            # check for convergence
            if epoch_idx - best_epoch > train_hyperParams[
                    'lookahead_epochs'] or np.isnan(train_loss):
                break

    return model
Exemple #4
0
def construct_forward_model(obs_dim=11,
                            act_dim=3,
                            rew_dim=1,
                            hidden_dim=200,
                            num_networks=7,
                            num_elites=5,
                            session=None):
    print(
        '[ BNN ] Observation dim {} | Action dim: {} | Hidden dim: {}'.format(
            obs_dim, act_dim, hidden_dim))
    params = {
        'name': 'BNN',
        'num_networks': num_networks,
        'num_elites': num_elites,
        'sess': session
    }
    model = BNN(params)

    model.add(
        FC(hidden_dim,
           input_dim=obs_dim + act_dim,
           activation="swish",
           weight_decay=0.000025))
    model.add(FC(hidden_dim, activation="swish", weight_decay=0.00005))
    model.add(FC(hidden_dim, activation="swish", weight_decay=0.000075))
    model.add(FC(hidden_dim, activation="swish", weight_decay=0.000075))
    model.add(FC(obs_dim + rew_dim, weight_decay=0.0001))
    model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
    return model
def trainBNN(data, bnn_hyperParams, hyperParams, logFile=None, outfile_base_name="", y_scalers=None):

    n_splits, N_train, d = data['train']['x'].shape
    nTrainBatches = int(N_train/hyperParams['batchSize'])
    bnn_hyperParams['batchSize'] = hyperParams['batchSize']

    # init Bayes NN
    model = BNN(bnn_hyperParams)

    # get training op
    optimizer = tf.train.AdamOptimizer(hyperParams['adamLr']).minimize(-model.elbo_obj)

    test_log_likelihoods = []
    test_rmses = []
    with tf.Session(config=hyperParams['tf_config']) as s:

        for split_idx in range(n_splits):
            s.run(tf.initialize_all_variables())
            best_elbo = -10000000.
            best_epoch = 0

            ### TRAIN MODEL ###
            for epoch_idx in range(hyperParams['nEpochs']):
                #shuffle data after every epoch
                training_idxs = list(range(N_train))
                shuffle(training_idxs)
                data['train']['x'][split_idx, :, :] = data['train']['x'][split_idx, training_idxs, :]
                data['train']['y'][split_idx, :, :] = data['train']['y'][split_idx, training_idxs, :]

                # training
                train_elbo = 0.
                exp_ll = 0.
                kld = 0.
                for batch_idx in range(nTrainBatches):
                    x = data['train']['x'][split_idx, batch_idx*hyperParams['batchSize']:(batch_idx+1)*hyperParams['batchSize'], :]
                    y = data['train']['y'][split_idx, batch_idx*hyperParams['batchSize']:(batch_idx+1)*hyperParams['batchSize'], :]
                    _, elbo_val, exp_ll_val, kld_val = s.run([optimizer, model.elbo_obj, model.exp_ll, model.kld], {model.X: x, model.Y: y})
                    # if batch_idx == 0: print(str(os))
                    train_elbo += elbo_val
                    exp_ll += exp_ll_val
                    kld += kld_val

                # check for ELBO improvement
                star_printer = ""
                train_elbo /= nTrainBatches
                exp_ll /= nTrainBatches
                kld /= nTrainBatches
                if train_elbo > best_elbo: 
                    best_elbo = train_elbo
                    best_epoch = epoch_idx
                    star_printer = "***"
               
                if (epoch_idx+1) % 500 == 0:
                    # log training progress
                    logging_str = "Epoch %d.  Expected LL: %.3f,  KLD: %.3f,  Train ELBO: %.3f %s" %(epoch_idx+1, exp_ll, kld, train_elbo, star_printer)
                    print(logging_str)
                    if logFile: 
                        logFile.write(logging_str + "\n")
                        logFile.flush()

            ### SAVE WEIGHTS TO INSPECT SHRINKAGE BEHAVIOR
            if split_idx % 5 == 0:
                weight_matrices = {}
                weight_matrices['mus'] = [s.run(m) for m in model.params['mu']]
                weight_matrices['sigmas'] = [s.run(sig) for sig in model.params['sigma']]
                cp.dump(weight_matrices, open(inArgs.experimentDir+"/params/weights_post_"+outfile_base_name+"_splitIdx_"+str(split_idx)+".pkl", "wb"))

            ### TEST MODEL ###
            test_rmse, test_ll = s.run(model.get_test_metrics(500, y_mu=y_scalers[split_idx].mean_, y_scale=y_scalers[split_idx].scale_, likelihood_noise_prec=bnn_hyperParams['like_noise_prec']), {model.X: data['test']['x'][split_idx,:,:], model.Y: data['test']['y'][split_idx,:,:]})
            test_log_likelihoods.append( test_ll )
            test_rmses.append( test_rmse )
            logging_str = "\n\nRun #%d, Test RMSE: %.3f, Test Log Likelihood: %.3f \n\n" %(split_idx, test_rmses[-1], test_log_likelihoods[-1])
            print(logging_str)
            logging_file.write(logging_str+"\n")

    logging_str = "\n\n\n\n Avg Test RMSE: %.3f +- %.3f,  Avg Test Log Likelihood: %.3f +- %.3f" %(np.mean(test_rmses), np.std(test_rmses), np.mean(test_log_likelihoods), np.std(test_log_likelihoods))
    print(logging_str)
    logging_file.write(logging_str+"\n")
 def test_compilation(self):
     clf = BNN([(300, 200), (200, 100)])
Exemple #7
0
def construct_model(sess, obs_dim, act_dim, model_hyperparams):
    # context_dim = 5 ,  rew_dim=1, hidden_dim=200,
    # ada_state_dynamics_pred = True, ada_rew_pred = True,
    # fast_adapt_steps = 2 , fast_adapt_lr = 0.01,
    # reg_weight = 1, pred_dynamics = True, fixed_preupdate_context = True,  num_networks=1, num_elites=1):
    # output_dim = rew_dim + obs_dim
    model = BNN(sess, obs_dim, act_dim, model_hyperparams)
    # ada_state_dynamics_pred, ada_rew_pred,
    # fast_adapt_steps , fast_adapt_lr, reg_weight , fixed_preupdate_context )

    model.add(
        FC(model.hidden_dim,
           input_dim=obs_dim + act_dim + model.context_dim,
           activation="swish",
           weight_decay=0.000025))
    model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.00005))
    model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.000075))
    model.add(FC(model.hidden_dim, activation="swish", weight_decay=0.000075))
    model.add(FC(model.output_dim, weight_decay=0.0001))

    # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
    model.build_graph()
    return model
Exemple #8
0
class ODE(nn.Module):
    def __init__(self,
                 num_channels=3,
                 img_size=(64, 64),
                 method='naive language',
                 batch_size=5):
        super(ODE, self).__init__()
        self.encoder = Encoder(num_channels=num_channels)
        self.rnn = RNN_FeatEncoder(batch_size=batch_size)
        self.decoder = Decoder(num_channels=num_channels)
        self.bnn = BNN(256 + 16, 256, act='tanh', n_hidden=128, bnn=False)
        self.bnn.draw_f()
        self.language_encoder = Language_Encoder()
        self.img_size = img_size
        self.method = method
        self.lan_mu_proj = nn.Linear(128, 128)
        self.lan_var_proj = nn.Linear(128, 128)
        self.vid_feat_proj = nn.Linear(256, 128)
        self.batch_size = batch_size

    def forward(self, input, pos_language=None, neg_language=None):
        """
        input is video trajectory in training, or single image in testing.
        language is language feature from VisualCommet
      """
        if pos_language is None:
            batchsize = input.shape[0]
            latent_total = self.encoder(input).view([batchsize, -1])
            state = torch.randn(batchsize, 128).to(device)
            init_state = latent_total
        else:
            batchsize = input.shape[0]
            if input.shape[1] == 1:
                # Only given the first image
                if self.method == "naive language":
                    latent_total = self.encoder(input.squeeze(1)).view(
                        [batchsize, 1, -1])
                    language_feature = self.language_encoder(pos_language)
                    state = torch.randn(batchsize, 128).to(
                        device) * language_feature + language_feature
                    init_state = latent_total[:, 0]
                elif self.method == 'nce':
                    latent_total = self.encoder(input.squeeze(1)).view(
                        [batchsize, 1, -1])
                    language_feature = self.language_encoder(pos_language)
                    mu = self.lan_mu_proj(language_feature)
                    logvar = self.lan_var_proj(language_feature)
                    state = self.decoder.reparametrize(mu, logvar)
                    init_state = latent_total[:, 0]

            else:
                contrastive_loss = 0
                # Training time, given a sequence
                latent_total = self.encoder(
                    input.reshape([batchsize * 10, 3, 64,
                                   64])).view([batchsize, 10, -1])
                h = self.rnn.init_h.to(device)
                c = self.rnn.init_c.to(device)
                for i in reversed(range(10)):
                    latent, h, c = self.rnn(
                        latent_total[:, i:i + 1, :].permute(1, 0, 2), h, c)
                state_dist = self.rnn.linearlay(latent.view([batchsize, 256]))

                if self.method == 'naive language':
                    mu = state_dist[:, :128]
                    logvar = state_dist[:, 128:]
                    language_feature = self.language_encoder(pos_language)
                    video_latent = self.decoder.reparametrize(mu, logvar)
                    state = video_latent * language_feature + language_feature
                    init_state = latent_total[:, 0]
                elif self.method == 'nce':
                    video_feature = self.vid_feat_proj(state_dist)
                    pos_language_feature = self.language_encoder(pos_language)
                    neg_language_feature = self.language_encoder(neg_language)
                    pos_sim = torch.sum(video_feature * pos_language_feature,
                                        dim=1)
                    neg_sim = torch.sum(video_feature * neg_language_feature,
                                        dim=1)
                    contrastive_loss = torch.mean(
                        torch.exp(neg_sim) / (torch.exp(pos_sim) + 1e-10))
                    mu = self.lan_mu_proj(pos_language_feature)
                    logvar = self.lan_var_proj(neg_language_feature)
                    state = self.decoder.reparametrize(mu, logvar)
                    init_state = latent_total[:, 0]

        concat_state = torch.cat([state, init_state], 1)
        ts1 = torch.tensor(np.linspace(1, 10, 10)).to(device)
        output_latent = odeint(self.bnn, concat_state, ts1)
        recon = self.decoder(output_latent.reshape(
            [batchsize * 10, 256])).reshape([batchsize, 10, 3, 64, 64])
        if input.shape[1] == 1:
            return recon
        else:
            return recon, mu, logvar, contrastive_loss