def run(sample_num, dqn_model, isRandom): # Initialize LSTM model, allocate the cuda memory model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True, hidden_dropout_prob_LSTM, bidirectional_LSTM, batch_size_LSTM, cuda_LSTM) model_LSTM.cuda() # Load the data based on selection(sampled data or random data) if not isRandom: with open( 'sampled_data/data_sampled_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_train = pickle.load(b) write_loss = open('sampled_data/data_sampled_dqn_loss_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') else: with open( 'sampled_data/data_sampled_random_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_train = pickle.load(b) write_loss = open('sampled_data/data_sampled_random_loss_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') writer = csv.DictWriter( write_loss, fieldnames=['Epoch', 'Train_loss', 'Train_ppl', 'Val_loss']) # LSTM Training Part # At any point, you can hit Ctrl + C to break out of training early. try: for epoch in range(1, n_epoch + 1): print("# Epoch", epoch) model_LSTM, train_loss, train_ppl = w_t_RL.train( model_LSTM, dataset_train, epoch) # Train LSTM based on dataset_labelled val_loss = w_t_RL.evaluate(model_LSTM, dataset_val, epoch) # Evaluate current loss writer.writerow({ 'Epoch': str(epoch), 'Train_loss': str(train_loss), 'Train_ppl': str(train_ppl), 'Val_loss': str(val_loss) }) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # write_loss.close() write_loss.close()
# Initialize optimizer to update the DQN optimizer = optim.RMSprop(model.parameters()) # Loop over episodes for i_ep in range(N_ep): if i_ep > 0: # Load the new state dict of DQN model model.load_state_dict( torch.load('dqn_models/DQN_' + str(i_ep - 1) + '.pt')) # Load the replay memory with open('dqn_models/replay_memory_' + str(i_ep - 1), 'rb') as handle: replay_memory = pickle.load(handle) # Initialize LSTM model, allocate the cuda memory model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True, hidden_dropout_prob_LSTM, bidirectional_LSTM, batch_size_LSTM, cuda_LSTM) model_LSTM.cuda() dataset = select_batch( sentence_list ) # Construct the batchified data from which training data will be selected dataset_train = np.array( [] ) # Stores batchified sentences selected for language modeling (training data) uni_seen_list = [] # Initialize unigram seen list bi_seen_list = [] # Initialize bigram seen list tri_seen_list = [] # Initialize trigram seen list
) # Max. number of data that can be selected for language modeling dataset_train = [ ] # Stores batchified sentences selected for language modeling replay_memory = [ ] # Stores the transition(State, Action, Reward, Next State) for the Q-Learning gamma = 0.8 N_ep = 10 # Number of episodes # Loop over episodes for i_ep in range(N_ep): # select the batchified data to be trained dataset = select_batch(sentence_list) # Initialize LSTM model, allocate the cuda memory model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True, hidden_dropout_prob_LSTM, bidirectional_LSTM, batch_size_LSTM, cuda_LSTM) model_LSTM.cuda() optimizer = optim.RMSprop(model.parameters()) # deleted torch.save(model_LSTM.state_dict(), 'prev.pt') uni_seen_list = [] # Initialize unigram unseen list bi_seen_list = [] # Initialize bigram unseen list tri_seen_list = [] # Initialize trigram unseen list idx = 0 for data in dataset: # Construct the state(how different our input is from the the dataset train, represented as scalar value) state, uni_seen_list, bi_seen_list, tri_seen_list = create_feature( data, uni_seen_list, bi_seen_list, tri_seen_list)
def run(sample_num, dqn_model, isRandom): # Initialize LSTM model, allocate the cuda memory model_LSTM = MyLSTM(n_letters, hidden_size_LSTM, nlayers_LSTM, True, True, hidden_dropout_prob_LSTM, bidirectional_LSTM, batch_size_LSTM, cuda_LSTM) model_LSTM.cuda() # Load the data based on selection(sampled data or random data) if not isRandom: with open( 'sampled_data/data_sampled_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_train = pickle.load(b) with open( 'sampled_data/value_sampled_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_value_dqn = pickle.load(b) write_loss = open('sampled_data/data_sampled_dqn_loss_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') write_val_diff = open('sampled_data/val_sampled_dqn_diff_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') else: with open( 'sampled_data/data_sampled_random_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_train = pickle.load(b) with open( 'sampled_data/value_sampled_random_' + str(dqn_model) + '_' + str(sample_num), 'rb') as b: dataset_value_dqn = pickle.load(b) write_loss = open('sampled_data/data_sampled_random_loss_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') write_val_diff = open('sampled_data/val_sampled_random_diff_' + str(dqn_model) + '_' + str(sample_num) + '.csv', 'w', encoding='UTF-8', newline='') writer = csv.DictWriter( write_loss, fieldnames=['Epoch', 'Train_loss', 'Train_ppl', 'Val_loss']) writer_value = csv.DictWriter( write_val_diff, fieldnames=['Epoch', 'Iteration', 'Reward', 'Value']) # LSTM Training Part # At any point, you can hit Ctrl + C to break out of training early. try: for epoch in range(1, n_epoch + 1): print("# Epoch", epoch) for i in range(len(dataset_train) ): # Loop through groups of N_options options loss_prev = w_t_RL.evaluate(model_LSTM, dataset_val, epoch) # Evaluate previous loss model_LSTM, train_loss, train_ppl = w_t_RL.train( model_LSTM, [dataset_train[i]], epoch) # Train LSTM based on dataset_labelled loss_curr = w_t_RL.evaluate(model_LSTM, dataset_val, epoch) # Evaluate current loss reward = loss_prev - loss_curr # Reward(Difference between previous loss and current loss) writer_value.writerow({ 'Epoch': str(epoch), 'Iteration': str(i), 'Reward': str(reward), 'Value': str(dataset_value_dqn[i]) }) # print (reward, dataset_value_dqn[i]) writer.writerow({ 'Epoch': str(epoch), 'Train_loss': str(train_loss), 'Train_ppl': str(train_ppl), 'Val_loss': str(loss_curr) }) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # write_loss.close() write_loss.close()