Example #1
0
def pretrain_model():
    print("======MLM & NSP Pretraining======")
    """ MLM & NSP Pretraining 
    You can modify this function by yourself.
    This function does not affects your final score.
    """
    train_dataset = ParagraphDataset(os.path.join('data', 'imdb_train.csv'))
    train_dataset = PretrainDataset(train_dataset)
    val_dataset = ParagraphDataset(os.path.join('data', 'imdb_val.csv'))
    val_dataset = PretrainDataset(val_dataset)
    model = MLMandNSPmodel(train_dataset.token_num)

    model_name = 'pretrained'

    MLM_train_losses, MLM_val_losses, NSP_train_losses, NSP_val_losses \
            = pretraining(model, model_name, train_dataset, val_dataset)

    torch.save(model.state_dict(), model_name + '_final.pth')

    with open(model_name + '_result.pkl', 'wb') as f:
        pickle.dump((MLM_train_losses, MLM_val_losses, NSP_train_losses,
                     NSP_val_losses), f)

    utils.plot_values(MLM_train_losses,
                      MLM_val_losses,
                      title=model_name + "_mlm")
    utils.plot_values(NSP_train_losses,
                      NSP_val_losses,
                      title=model_name + "_nsp")

    print("Final MLM training loss: {:06.4f}".format(MLM_train_losses[-1]))
    print("Final MLM validation loss: {:06.4f}".format(MLM_val_losses[-1]))
    print("Final NSP training loss: {:06.4f}".format(NSP_train_losses[-1]))
    print("Final NSP validation loss: {:06.4f}".format(NSP_val_losses[-1]))
Example #2
0
def run():
    # build the mdp
    start = time.time()
    room_size = 3
    num_rooms = 5
    mdp = maze_mdp.MazeMDP(room_size=room_size, num_rooms=num_rooms)

    # build the agent
    m = Manager()
    init_dict = {(s, a): 0 for s in mdp.states for a in mdp.ACTIONS + [None]}
    shared_weights = m.dict(init_dict)
    shared_value_weights = m.dict(init_dict)
    agent = async_actor_critic.AsyncActorCritic(actions=mdp.ACTIONS, discount=mdp.DISCOUNT, 
        weights=shared_weights, value_weights=shared_value_weights, tau=.3, learning_rate=.5)

    # build a single experiment
    rewards = m.list()
    start_state_values = m.list()
    max_steps = (2 * room_size * num_rooms) ** 2
    exp = experiment.Experiment(mdp=mdp, agent=agent, num_episodes=800, max_steps=max_steps,
        rewards=rewards, start_state_values=start_state_values)

    # run the experiment
    multiexperiment = experiment.MultiProcessExperiment(experiment=exp, num_agents=NUM_PROCESSES)
    multiexperiment.run()

    # report results
    end = time.time()
    print 'took {} seconds to converge'.format(end - start)
    mdp.print_state_values(shared_value_weights)
    optimal = mdp.EXIT_REWARD + (2 * room_size * num_rooms * mdp.MOVE_REWARD)
    utils.plot_values(rewards, optimal, 'rewards')
    utils.plot_values(start_state_values, optimal, 'start state value')
Example #3
0
def train_model():
    print("======IMDB Training======")
    """ IMDB Training 
    You can modify this function by yourself.
    This function does not affects your final score.
    """
    train_dataset = IMDBdataset(os.path.join('data', 'imdb_train.csv'))
    val_dataset = IMDBdataset(os.path.join('data', 'imdb_val.csv'))
    model = IMDBmodel(train_dataset.token_num)

    model_name = 'imdb'

    # You can choose whether to enable fine-tuning
    fine_tuning = True

    if fine_tuning:
        model_name += '_fine_tuned'
        pretrained_model_path = 'pretrained_final.pth'

        # You can use a model which has been pretrained over 200 epochs by TA
        # If you use this saved model, you should mention it in the report
        #
        # pretrained_model_path = 'pretrained_byTA.pth'

    else:
        model_name += '_no_fine_tuned'
        pretrained_model_path = None

    train_losses, val_losses, train_accuracies, val_accuracies \
            = training(model, model_name, train_dataset, val_dataset, \
                       pretrained_model_path=pretrained_model_path)

    torch.save(model.state_dict(), model_name + '_final.pth')

    with open(model_name + '_result.pkl', 'wb') as f:
        pickle.dump(
            (train_losses, val_losses, train_accuracies, val_accuracies), f)

    utils.plot_values(train_losses, val_losses, title=model_name + "_losses")
    utils.plot_values(train_accuracies,
                      val_accuracies,
                      title=model_name + "_accuracies")

    print("Final training loss: {:06.4f}".format(train_losses[-1]))
    print("Final validation loss: {:06.4f}".format(val_losses[-1]))
    print("Final training accuracy: {:06.4f}".format(train_accuracies[-1]))
    print("Final validation accuracy: {:06.4f}".format(val_accuracies[-1]))
Example #4
0
    else:
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        model.to(device)
        print("Running on:", device)
        train_losses, val_losses, train_accuracies, val_accuracies = train(
            model, train_set, val_set, criterion, optimizer, batch_size,
            num_epochs)
        torch.save(model.state_dict(), pretrained_model_path)

        print("Final training loss: {:06.4f}".format(train_losses[-1]))
        print("Final validation loss: {:06.4f}".format(val_losses[-1]))
        print("Final training accuracy: {:06.4f}".format(train_accuracies[-1]))
        print("Final validation accuracy: {:06.4f}".format(val_accuracies[-1]))

        plot_values(
            train_losses,
            val_losses,
            title="Losses",
            path="./losses/" + setting +
            f"_loss_hid({hidden_dim})_hid2({hidden_dim2})_dropout5_bn.png")
        plot_values(
            train_accuracies,
            val_accuracies,
            title="Accuracies",
            path="./accuracies/" + setting +
            f"_acc_hid({hidden_dim})_hid2({hidden_dim2})_dropout5_bn.png")

    # TODO: Predict test data  ## save to answer_private.txt
    # predict(model, test_data)
Example #5
0
NUM_EPISODES = 100_000


def monte_carlo_online_control(num_episodes=NUM_EPISODES):
    # init Q(s,a)=0, N(s,a)=0 for every s a
    Q = state_action_map()
    N = state_action_map()
    N_s = state_map()

    for k in range(num_episodes):
        if k % 1000 == 0:
            print(f'{k} / {num_episodes}')
        pi = e_greedy(Q, N_s)
        episode, reward = sample_episode(pi)
        explored = set()
        for s, a in episode:
            if s not in explored:
                explored.add(s)
                N[s, a] = N[s, a] + 1
                N_s[s] = N_s[s] + 1
                Q[s, a] = Q[s, a] + (1 / N[s, a]) * (reward - Q[s, a])
            # print(f'{s}{a} -> {Q[s, a]}')

    return Q


if __name__ == '__main__':
    Q_vals = monte_carlo_online_control()
    plot_values(Q_vals)