def run(): """Returns array of test reward per epoch for one run""" global q_func q_func = np.zeros((NUM_ROOM_DESC, NUM_QUESTS, NUM_ACTIONS, NUM_OBJECTS)) single_run_epoch_rewards_test = [] pbar = tqdm(range(NUM_EPOCHS), ncols=80) for _ in pbar: single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description( "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test))) return single_run_epoch_rewards_test
def run(): """Returns array of test reward per epoch for one run""" global theta theta = np.zeros([action_dim, state_dim]) single_run_epoch_rewards_test = [] pbar = tqdm(range(NUM_EPOCHS), ncols=80) for _ in pbar: single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description( "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test))) return single_run_epoch_rewards_test
def run(): """Returns array of test reward per epoch for one run""" global model global optimizer model = DQN(state_dim, NUM_ACTIONS, NUM_OBJECTS) optimizer = optim.SGD(model.parameters(), lr=ALPHA) single_run_epoch_rewards_test = [] pbar = tqdm(range(NUM_EPOCHS), ncols=80) for _ in pbar: single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description( "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test))) return single_run_epoch_rewards_test
def run(): """ Returns array of test reward per epoch for one run """ global model, optimizer model = nn.Linear(state_dim, action_dim) optimizer = optim.SGD(model.parameters(), lr=ALPHA) # optimizer = optim.Adam(model.parameters()) single_run_epoch_rewards_test = [] pbar = tqdm(range(NUM_EPOCHS), ncols=80) for epoch in pbar: single_run_epoch_rewards_test.append(run_epoch()) pbar.set_description("Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format( np.mean(single_run_epoch_rewards_test), utils.ewma(single_run_epoch_rewards_test) )) return single_run_epoch_rewards_test