def question_1(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 50 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): #print("Episode number: "+str(e)) rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() #print("Number of steps: "+str(steps)) # print(steps[r, e]) np.save('steps', steps) plotGraph() del agent, environment, rlglue agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 1000 num_runs = 1 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): print("Episode number: "+str(e)) rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() #print("Number of steps: "+str(steps)) # print(steps[r, e]) #np.save('steps', steps) #plotGraph() rlglue.rl_agent_message("plot3DGraph")
def question_4(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 50 max_eps_steps = 1000000 steps = np.zeros([num_runs, num_episodes]) rewards = [] for r in range(num_runs): print("run number : ", r + 1) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() reward = rlglue.total_reward() rewards.append(reward) mean = sum(rewards) / len(rewards) stder = statistics.stdev(rewards) / math.sqrt(len(rewards)) print("mean:", mean) print("std:", stder) np.save('bonus_steps', steps) np.save("mean", mean) np.save("stder", stder)
def question_1(num_episodes): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) max_eps_steps = 100000 steps = np.zeros(num_episodes) rlglue.rl_init() for e in tqdm(range(num_episodes)): rlglue.rl_episode(max_eps_steps) steps[e] = rlglue.num_ep_steps() # print(steps[e]) return steps
def question_1(): agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 5 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) np.save('steps', steps)
def question_3(): agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 1000 num_runs = 1 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) # only 1 run for r in range(num_runs): print("1000 episode run : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # get the list of value functions [X,Y,Z] represents position, velocity, state-value Return = rlglue.rl_agent_message(1) return Return
def part3(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 1 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) np.save('steps', steps) fout = open('value', 'w') steps = 50 num_of_actions = 3 for i in range(steps): for j in range(steps): q = [] for a in range(num_of_actions): pos = -1.2 + (1 * 1.7 / steps) vel = -0.07 + (j * 0.14 / steps) tile = (pos, vel) inds = Agent.F(tile, self.action) q.append(np.sum(self.weights[inds])) height = max(q) fout.write(repr(-height) + '') fout.write('\n') fout.close() np.save('heights', height) np.save('steps', steps)
def question_1(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 50 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) st = time.time() rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) finish = time.time() - st print(str(finish) + " seconds elapsed") np.save('steps', steps)
i = 0 for key in sorted_params_names: msg_to_send = key + ' ' + str(grid_search_params[key][combination[i]]) print(msg_to_send) agent.agent_message(msg_to_send) i += 1 rl_glue = RLGlue(agent_obj=agent, env_obj=env) for r in range(int(initial_params['num_runs'])): print('run: ' + str(r)) agent.random_seed = r rl_glue.rl_init() agent.epsilon = float(initial_params['initial_epsilon']) for e in range(int(initial_params['total_episodes'])): rl_glue.rl_episode(max_episode_steps) agent.epsilon = compute_epsilon(current_epsilon=agent.epsilon) episodes_steps[j, r, e] = rl_glue.num_ep_steps() Q_t[j, r, e] = agent.Q if initial_params['agent'] != 'sarsa0': Phi_t[j, r, e] = agent.Phi if 'pies' in initial_params['agent']: agent.xi = compute_xi(current_xi=agent.xi, decay=agent.decay, decay_param=agent.decay_param) print('path length', len(agent.path)) j += 1 # finding the best parameter setting for grid search based on AUC best_param_set_index = np.random.choice( np.flatnonzero( np.trapz(np.mean(episodes_steps, 1)) == np.trapz(np.mean(episodes_steps, 1)).min())) best_params_string = '' for index in range(0, len(tuple(itertools.product(*test_coord))[best_param_set_index])):
print("training process with {} planning step".format(ite)) # Create and pass agent and environment objects to RLGlue environment = DynaQEnvironment() agent = DynaQAgent(ite) rlglue = RLGlue(environment, agent) del agent, environment # don't use these anymore for run in range(num_runs): print("run number: {}\n".format(run)) # set seed for reproducibility np.random.seed(run) # initialize RL-Glue rlglue.rl_init() # loop over episodes for episode in range(num_episodes): rlglue.rl_episode() result[episode] += rlglue.num_ep_steps() data = rlglue.rl_agent_message( "Q for all states in the episode") Q.append(data) result = result / num_runs output.append(result) np.save("output", output)