Ejemplo n.º 1
0
def run(sample_num, dqn_model, isRandom):
    # Initialize LSTM model, allocate the cuda memory
    model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True,
                        hidden_dropout_prob_LSTM, bidirectional_LSTM,
                        batch_size_LSTM, cuda_LSTM)
    model_LSTM.cuda()

    # Load the data based on selection(sampled data or random data)
    if not isRandom:
        with open(
                'sampled_data/data_sampled_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_train = pickle.load(b)
        write_loss = open('sampled_data/data_sampled_dqn_loss_' +
                          str(dqn_model) + '_' + str(sample_num) + '.csv',
                          'w',
                          encoding='UTF-8',
                          newline='')
    else:
        with open(
                'sampled_data/data_sampled_random_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_train = pickle.load(b)
        write_loss = open('sampled_data/data_sampled_random_loss_' +
                          str(dqn_model) + '_' + str(sample_num) + '.csv',
                          'w',
                          encoding='UTF-8',
                          newline='')

    writer = csv.DictWriter(
        write_loss,
        fieldnames=['Epoch', 'Train_loss', 'Train_ppl', 'Val_loss'])

    # LSTM Training Part
    # At any point, you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, n_epoch + 1):
            print("# Epoch", epoch)

            model_LSTM, train_loss, train_ppl = w_t_RL.train(
                model_LSTM, dataset_train,
                epoch)  # Train LSTM based on dataset_labelled
            val_loss = w_t_RL.evaluate(model_LSTM, dataset_val,
                                       epoch)  # Evaluate current loss
            writer.writerow({
                'Epoch': str(epoch),
                'Train_loss': str(train_loss),
                'Train_ppl': str(train_ppl),
                'Val_loss': str(val_loss)
            })

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # write_loss.close()
    write_loss.close()
Ejemplo n.º 2
0
# Initialize optimizer to update the DQN
optimizer = optim.RMSprop(model.parameters())

# Loop over episodes
for i_ep in range(N_ep):
    if i_ep > 0:
        # Load the new state dict of DQN model
        model.load_state_dict(
            torch.load('dqn_models/DQN_' + str(i_ep - 1) + '.pt'))
        # Load the replay memory
        with open('dqn_models/replay_memory_' + str(i_ep - 1), 'rb') as handle:
            replay_memory = pickle.load(handle)

    # Initialize LSTM model, allocate the cuda memory
    model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True,
                        hidden_dropout_prob_LSTM, bidirectional_LSTM,
                        batch_size_LSTM, cuda_LSTM)
    model_LSTM.cuda()

    dataset = select_batch(
        sentence_list
    )  # Construct the batchified data from which training data will be selected

    dataset_train = np.array(
        []
    )  # Stores batchified sentences selected for language modeling (training data)

    uni_seen_list = []  # Initialize unigram seen list
    bi_seen_list = []  # Initialize bigram seen list
    tri_seen_list = []  # Initialize trigram seen list
)  # Max. number of data that can be selected for language modeling
dataset_train = [
]  # Stores batchified sentences selected for language modeling
replay_memory = [
]  # Stores the transition(State, Action, Reward, Next State) for the Q-Learning
gamma = 0.8
N_ep = 10  # Number of episodes

# Loop over episodes
for i_ep in range(N_ep):
    # select the batchified data to be trained
    dataset = select_batch(sentence_list)

    # Initialize LSTM model, allocate the cuda memory
    model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True,
                        hidden_dropout_prob_LSTM, bidirectional_LSTM,
                        batch_size_LSTM, cuda_LSTM)
    model_LSTM.cuda()

    optimizer = optim.RMSprop(model.parameters())  # deleted
    torch.save(model_LSTM.state_dict(), 'prev.pt')

    uni_seen_list = []  # Initialize unigram unseen list
    bi_seen_list = []  # Initialize bigram unseen list
    tri_seen_list = []  # Initialize trigram unseen list

    idx = 0
    for data in dataset:
        # Construct the state(how different our input is from the the dataset train, represented as scalar value)
        state, uni_seen_list, bi_seen_list, tri_seen_list = create_feature(
            data, uni_seen_list, bi_seen_list, tri_seen_list)
Ejemplo n.º 4
0
def run(sample_num, dqn_model, isRandom):
    # Initialize LSTM model, allocate the cuda memory
    model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True,
                        hidden_dropout_prob_LSTM, bidirectional_LSTM,
                        batch_size_LSTM, cuda_LSTM)
    model_LSTM.cuda()

    # Load the data based on selection(sampled data or random data)
    if not isRandom:
        with open(
                'sampled_data/data_sampled_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_train = pickle.load(b)

        with open(
                'sampled_data/value_sampled_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_value_dqn = pickle.load(b)

        write_loss = open('sampled_data/data_sampled_dqn_loss_' +
                          str(dqn_model) + '_' + str(sample_num) + '.csv',
                          'w',
                          encoding='UTF-8',
                          newline='')
        write_val_diff = open('sampled_data/val_sampled_dqn_diff_' +
                              str(dqn_model) + '_' + str(sample_num) + '.csv',
                              'w',
                              encoding='UTF-8',
                              newline='')
    else:
        with open(
                'sampled_data/data_sampled_random_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_train = pickle.load(b)

        with open(
                'sampled_data/value_sampled_random_' + str(dqn_model) + '_' +
                str(sample_num), 'rb') as b:
            dataset_value_dqn = pickle.load(b)

        write_loss = open('sampled_data/data_sampled_random_loss_' +
                          str(dqn_model) + '_' + str(sample_num) + '.csv',
                          'w',
                          encoding='UTF-8',
                          newline='')
        write_val_diff = open('sampled_data/val_sampled_random_diff_' +
                              str(dqn_model) + '_' + str(sample_num) + '.csv',
                              'w',
                              encoding='UTF-8',
                              newline='')

    writer = csv.DictWriter(
        write_loss,
        fieldnames=['Epoch', 'Train_loss', 'Train_ppl', 'Val_loss'])
    writer_value = csv.DictWriter(
        write_val_diff, fieldnames=['Epoch', 'Iteration', 'Reward', 'Value'])
    # LSTM Training Part
    # At any point, you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, n_epoch + 1):
            print("# Epoch", epoch)

            for i in range(len(dataset_train)
                           ):  # Loop through groups of N_options options
                loss_prev = w_t_RL.evaluate(model_LSTM, dataset_val,
                                            epoch)  # Evaluate previous loss
                model_LSTM, train_loss, train_ppl = w_t_RL.train(
                    model_LSTM, [dataset_train[i]],
                    epoch)  # Train LSTM based on dataset_labelled
                loss_curr = w_t_RL.evaluate(model_LSTM, dataset_val,
                                            epoch)  # Evaluate current loss
                reward = loss_prev - loss_curr  # Reward(Difference between previous loss and current loss)
                writer_value.writerow({
                    'Epoch': str(epoch),
                    'Iteration': str(i),
                    'Reward': str(reward),
                    'Value': str(dataset_value_dqn[i])
                })
                # print (reward, dataset_value_dqn[i])

            writer.writerow({
                'Epoch': str(epoch),
                'Train_loss': str(train_loss),
                'Train_ppl': str(train_ppl),
                'Val_loss': str(loss_curr)
            })

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # write_loss.close()
    write_loss.close()