def plot_grid_2_mc(): test_grids = TEST_GRIDS all_test_list = [(key, grid) for key, grid in test_grids.items()] sorted(all_test_list, key=lambda x: x[0]) agent = Agent() iters = ITERS total_normal_grid_score, total_grid1_score, total_grid2_score, total_grid3_score, total_grid4_score = [],[],[],[],[] repeats = REPEATS # for n in iters: # print("Running iteration {n}".format(n=n)) grid2_score, grid4_score = [], [] for ind, grid_init in all_test_list: normalized_score = 0 for j in range(repeats): grid_num = int(ind) #ind initially is a string. if (grid_num < 200) or (grid_num > 300): continue best_reward = grid_init['best_reward'] testgrid = Grid(5, random=False, init_pos=grid_init) if grid_num in {204, 208}: Q, policy = agent.mc_first_visit_control(testgrid.copy(), iters=500) _, _, mc_reward = agent.run_final_policy(testgrid.copy(), Q, display=True) else: continue normalized_score += mc_reward - best_reward if normalized_score != 0: print( "Grid num {0} did not achieve best score".format(grid_num))
def graph_dual_model_performance(): test_grids = TEST_GRIDS all_test_list = [(key, grid) for key, grid in test_grids.items()] sorted(all_test_list, key=lambda x: x[0]) agent = Agent() iters = ITERS total_normal_grid_score, total_grid1_score, total_grid2_score, total_grid3_score, total_grid4_score = [],[],[],[],[] repeats = REPEATS for n in iters: print("Running iteration {n}".format(n=n)) normal_grid_score, grid1_score, grid2_score, grid3_score, grid4_score = [],[],[],[],[] for ind, grid_init in all_test_list: normalized_score = 0 for j in range(repeats): grid_num = int(ind) #ind initially is a string. best_reward = grid_init['best_reward'] testgrid = Grid(5, random=False, init_pos=grid_init) Q, policy = agent.mc_first_visit_control(testgrid.copy(), iters=n, nn_init=True) _, _, dual_model_reward = agent.run_final_policy( testgrid.copy(), Q, nn_init=True, display=False) normalized_score += dual_model_reward - best_reward if grid_num < 100: normal_grid_score.append(normalized_score / repeats) elif grid_num < 200: #grid type 1 grid1_score.append(normalized_score / repeats) elif grid_num < 300: #grid type 2 grid2_score.append(normalized_score / repeats) elif grid_num < 400: #grid type 3 grid3_score.append(normalized_score / repeats) else: #grid type 4 grid4_score.append(normalized_score / repeats) total_normal_grid_score.append(np.mean(normal_grid_score)) total_grid1_score.append(np.mean(grid1_score)) total_grid2_score.append(np.mean(grid2_score)) total_grid3_score.append(np.mean(grid3_score)) total_grid4_score.append(np.mean(grid4_score)) # plt.plot(iters, total_normal_grid_score, label="normal grids", color="red") plt.plot(iters, total_grid1_score, label='push dilemma', color="blue") plt.plot(iters, total_grid2_score, label='switch dilemma', color="green") plt.plot(iters, total_grid3_score, label='switch save', color="orange") plt.plot(iters, total_grid4_score, label='push get', color="brown") plt.legend() plt.xlabel("Number of MC Iterations") plt.ylabel("Normalized Score") plt.title("Dual model performance on all test grids") plt.show()