single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description( "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test))) return single_run_epoch_rewards_test if __name__ == '__main__': state_texts = utils.load_data('game.tsv') dictionary = utils.bag_of_words(state_texts) state_dim = len(dictionary) action_dim = NUM_ACTIONS * NUM_OBJECTS # set up the game framework.load_game_data() epoch_rewards_test = [] # shape NUM_RUNS * NUM_EPOCHS for _ in range(NUM_RUNS): epoch_rewards_test.append(run()) epoch_rewards_test = np.array(epoch_rewards_test) x = np.arange(NUM_EPOCHS) fig, axis = plt.subplots() axis.plot(x, np.mean(epoch_rewards_test, axis=0)) # plot reward per epoch averaged per run axis.set_xlabel('Epochs') axis.set_ylabel('reward') axis.set_title(('Linear: nRuns=%d, Epilon=%.2f, Epi=%d, alpha=%.4f' %
optimizer = optim.SGD(model.parameters(), lr=ALPHA) single_run_epoch_rewards_test = [] pbar = tqdm(range(NUM_EPOCHS), ncols=80) for _ in pbar: single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description( "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test))) return single_run_epoch_rewards_test if __name__ == '__main__': pass ''' state_texts = utils.load_data('game.tsv') dictionary = utils.bag_of_words(state_texts) framework.load_game_data() ''' state_dim = K * 3 + 1 action_dim = NUM_ACTIONS # set up the game game = framework.NewsVendorGame(K, Kpr, Kst, Kpe, Ktr, CWarehouse, CTruck, Price, dmax) epoch_rewards_test = [] # shape NUM_RUNS * NUM_EPOCHS for _ in range(NUM_RUNS):