def main(): agent_dict = [] errors = [] wall1 = MovingCube(1) print('began running at %s' % datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")) for i in range(1): learner = NeuralNetwork(cru.AGENT_LEARNER_NETWORK_SHAPE, cru.linear_relu, min=-0.1, max=0.1) curious_agent = CuriousAgent(0) curious_agent.learner = learner #sqv.set_global('AGENTS_COUNT', 1) d = activate_agent(10, 1000, render=False, print_info=False, reset_env=False, reset_agent=True, agents=[curious_agent, wall1], init_learners=[deepcopy(learner)]*2, get_last_step_avg_error=True, moving_walls_amount=1, moving_wall_start_index=1) agent_dict.append(get_agent_dict(d)) print('finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))) agent_dict = join_dict_list(agent_dict) fig1, ax1 = plot_together(np.arange(len(agent_dict['last_errors'])), [agent_dict['last_errors'],{'label':'curious', 'color':'blue'}], title='Total Errors STD', axis_labels=['epoch', 'last error']) plt.show() from IPython import embed embed()
def main(): env = gym.make("square-v0") state = env.reset() agent = CuriousAgent() error = 1.0 tds = [] errors = [] timesteps = [] rewards = [] costs = [] infos = [] for timestep in range(200): state, error, info, td, reward, prediction = agent.take_step( env, state, error) errors.append(error) tds.append(td) rewards.append(reward) infos.append(info) timesteps.append(timestep) if timestep % 50 == 0: print("state: " + str(state)) print("pred:" + str(np.round(prediction))) #learner_c = agent.train(300) #costs.append(np.sqrt(learner_c)) env.render() print(agent.learner_alpha) print(agent.value_alpha) print(agent.epsilon) locs = info_to_location(infos) scales = loc_to_scalar(locs) print(scales) import matplotlib.pyplot as plt plt.figure(timesteps, scales) plt.plot(timesteps, tds) plt.title("TDS") plt.axis([min(timesteps), max(timesteps), min(tds), max(tds)]) fig, ax = plt.subplots(1, 1) ax.plot(timesteps, errors) ax.plot(timesteps) ax.set_title("Errors") ax([min(timesteps), max(timesteps), min(errors), max(errors)]) plt.figure() plt.plot(timesteps, rewards) plt.title("Rewards") plt.axis([min(timesteps), max(timesteps), min(rewards), max(rewards)]) plt.show()
def main(): agent_dict = [] random_dict = [] random_agent = RandomAgent(0) wall1, wall2 = MovingCube(1), MovingCube(2) print('began running at %s' % datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")) color_map_agent = [] sqv.set_global("AGENTS_COUNT", NUMBER_OF_AGENTS) for i in range(1): learner = NeuralNetwork(cru.AGENT_LEARNER_NETWORK_SHAPE, cru.linear_relu, min=-0.01, max=0.01) curious_agent = CuriousAgent(0) curious_agent.learner = deepcopy(learner) d = activate_agent(MAX_STEPS, number_of_epoches=NUM_OF_EPOCHES, render=False, print_info=False, reset_env=False, agents=[curious_agent], get_avg_errors=False, get_values_field=True, number_of_error_agents=1) curious_agent.learner = deepcopy(learner) d1 = activate_agent(MAX_STEPS, number_of_epoches=1, render=False, print_info=False, reset_env=False, agents=[curious_agent], get_avg_errors=True, get_values_field=True, number_of_error_agents=1) d['total_errors'] = d1['total_errors'] agent_dict.append(get_agent_dict(d)) color_map_agent.append(stats.get_color_map(curious_agent)) random_agent.learner = learner d = activate_agent(MAX_STEPS, render=False, print_info=False, reset_env=False, agents=[random_agent], get_avg_errors=True, number_of_error_agents=1) random_dict.append(get_agent_dict(d)) print( 'finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))) means_curious = [] for i in agent_dict: means_curious.append(i['total_errors']) std_agent = np.array(means_curious).std(axis=0) color_map_agent = np.array(color_map_agent).mean(axis=0) figs, axes = draw_color_maps(color_map_agent) means_random = [] for i in random_dict: means_random.append(i['total_errors']) std_random = np.array(means_random).std(axis=0) agent_dict = join_dict_list(agent_dict) #draw_plots(agent_dict) random_dict = join_dict_list(random_dict) #draw_plots(random_dict) #fig, ax ,q = plot_field(*agent_dict['fields'], title='Agent Value Field', color=agent_dict['fields_colors']) errors_rate_curious = agent_dict['total_errors'] errors_rate_random = random_dict['total_errors'] last_td_agent = np.zeros((len(agent_dict['epoches_tds']), )) for i, v in enumerate(agent_dict['epoches_tds']): last_td_agent[i] = v[-1] fig, ax = plot_together( np.arange(len(last_td_agent)), [last_td_agent, { 'label': 'curious', 'color': 'blue' }], title='Epochs Last TD', axis_labels=['epoch', 'last TD']) fig1, ax1 = plot_together( random_dict['timesteps'], [errors_rate_curious, { 'label': 'curious', 'color': 'blue' }], [errors_rate_random, { 'label': 'random', 'color': 'red' }], title='Total Errors STD', std=[std_agent, std_random], axis_labels=['steps', 'total error']) fig2, ax2 = plot_together( random_dict['timesteps'], [errors_rate_curious, { 'label': 'curious', 'color': 'blue' }], [errors_rate_random, { 'label': 'random', 'color': 'red' }], title='Total Errors Means', means=[means_curious, means_random], axis_labels=['steps', 'total error']) fig3, ax3 = plot_together(random_dict['timesteps'][:-1], [ stats.derivative(errors_rate_curious), { 'label': 'curious', 'color': 'blue' } ], [ stats.derivative(errors_rate_random), { 'label': 'random', 'color': 'red' } ], title='Total Errors Derivative', axis_labels=['steps', 'total error']) fig1.savefig('./plots/std.png') fig2.savefig('./plots/means.png') plt.show()
def activate_agent(epoch_time, number_of_epoches=1, number_of_agents=1, reset_agent=True, agents=None, render=True, print_info=True, reset_env=False, env=None, get_avg_errors=False, set_cube=0, get_values_field=False, moving_walls_amount=0, moving_wall_start_index=0, init_learners=None, get_last_step_avg_error=False, number_of_error_agents=1): if env is None: env = gym.make('square-v0') states = env.reset(render=render) if agents is None: agents = [] number_of_agents = max(number_of_agents, len(agents)) for i in range(len(agents), number_of_agents): agents.append(CuriousAgent(i)) list_of_q = [] total_errors = [[] for _ in range(number_of_agents)] last_errors = [[] for _ in range(number_of_agents)] agent_errors = [0] * number_of_agents tds = [[] for _ in range(number_of_agents)] errors = [[] for _ in range(number_of_agents)] timesteps = [[] for _ in range(number_of_agents)] rewards = [[] for _ in range(number_of_agents)] costs = [[] for _ in range(number_of_agents)] infos = [[] for _ in range(number_of_agents)] epoches_errors = [[] for _ in range(number_of_agents)] epoch_error = [[] for _ in range(number_of_agents)] epoches_tds = [[] for _ in range(number_of_agents)] epoch_td = [[] for _ in range(number_of_agents)] values_before = [[np.zeros((sqv.RECT_WIDTH + 1, sqv.RECT_HEIGHT + 1)) for _ in range(4)] for _ in range(number_of_agents)] for t in range(number_of_agents): for x in range(sqv.RECT_WIDTH + 1): for y in range(sqv.RECT_HEIGHT + 1): env.agents[t]["loc"] = np.array([x, y]) for i in range(4): ob, _, _, _ = env.step(np.array([0]), 0) values_before[t][i][x, y] += np.amax(agents[t].q_function.hypot(ob)) env.agents[t]['loc'] = np.array(sqv.INIT_LOCATIONS[t]) env.agents[t]['dir'] = np.array(sqv.INIT_DIRECTIONS[t]) for timestep in range(number_of_epoches * epoch_time): for i, agent in enumerate(agents): state = states[i] error = agent_errors[i] state, error, info, td, reward, prediction = agent.take_step(env, state, error) states[i] = state agent_errors[i] = error errors[i].append(error) tds[i].append(td) rewards[i].append(reward) infos[i].append(info) timesteps[i].append(timestep) epoch_td[i].append(td) epoch_error[i].append(error) list_of_q.append(deepcopy(agent.q_function.layers)) if print_info: if timestep % PRINT_STATE_PRED == 0: print("state: " + str(state)) print("prediction: " + str(np.round(prediction))) if timestep % PRINT_TIME_STEP == 0: print("time step: " + str(timestep)) if timestep % epoch_time == 0 and timestep != 0: epoches_errors[i].append(epoch_error[i]) epoch_error[i] = [] epoches_tds[i].append(epoch_td[i]) epoch_td[i] = [] if get_last_step_avg_error and i < number_of_error_agents: last_errors[i].append(stats.average_errors_on_trained_agent(agent, env)) if reset_agent: agent.reset_network() if init_learners is not None: agent.learner = deepcopy(init_learners[i]) env.agents[i]["loc"] = env.square_space.sample() states[i] = env._get_all_observations()[i] if reset_env and i + 1 == len(agents) and timestep % epoch_time == 0 and timestep != 0: if render: env.close() sqv.set_global('RECT_WIDTH', random.randint(15, 15)) sqv.set_global('RECT_HEIGHT', random.randint(15, 15)) env = gym.make('square-v0') states = env.reset(render=render) for c in range(moving_walls_amount): sqv.INIT_LOCATIONS[c + moving_wall_start_index] = env.square_space.sample() sqv.INIT_DIRECTIONS[c + moving_wall_start_index] = random.choice(stats.ALL_DIRECTIONS) for c in range(set_cube): sqv.INIT_LOCATIONS[c + number_of_agents + moving_walls_amount] = env.square_space.sample() if get_avg_errors and i < number_of_error_agents: total_errors[i].append(stats.func2(agent, env)) # learner_c = agent.train(300) # costs.append(np.sqrt(learner_c)) if render: env.render() for i in range(number_of_agents): epoches_errors[i].append(epoch_error[i]) epoches_tds[i].append(epoch_td[i]) values = [[np.zeros((sqv.RECT_WIDTH + 1, sqv.RECT_HEIGHT + 1)) for _ in range(4)] for _ in range(number_of_agents)] for t in range(number_of_agents): for x in range(sqv.RECT_WIDTH + 1): for y in range(sqv.RECT_HEIGHT + 1): env.agents[t]["loc"] = np.array([x, y]) for i in range(4): ob, _, _, _ = env.step(np.array([0]), 0) values[t][i][x, y] += np.amax(agents[t].q_function.hypot(ob)) if render: env.close() ret = {} if get_values_field: q = [] cs = [] for i in agents: v, c = stats.get_agent_value_field(i, env) q.append(v) cs.append(c) ret['fields'] = q ret['fields_colors'] = cs ret['agents'] = agents ret['tds'] = tds ret['errors'] = errors ret['timesteps'] = timesteps ret['rewards'] = rewards ret['costs'] = costs ret['infos'] = infos ret['epoches_errors'] = epoches_errors ret['epoches_tds'] = epoches_tds ret['values_before'] = values_before ret['values'] = values ret['total_errors'] = total_errors ret['last_errors'] = last_errors return ret
def main(): d_nonreset = {} d_reset = {} d_nontrained = {} for i in range(100): agent = CuriousAgent(0) d = activate_agent(10, 10, render=False, print_info=False, agents=[deepcopy(agent)]) reset_trained_agent = d['agents'] reset_trained_agent[0].reset_network() reset_trained_agent[0].learner = deepcopy(agent.learner) #reset_trained_agent[0].q_alpha = agent.q_alpha d = activate_agent(100, reset_agent=False, render=False, print_info=False, agents=[deepcopy(agent)]) nonreset_trained_agent = d['agents'] nonreset_trained_agent[0].reset_network() nonreset_trained_agent[0].learner = deepcopy(agent.learner) #draw_plots(get_agent_dict(d,0), plot_values_before=False, plot_values=False,plot_errors=False, plot_locations_bars=False) #nonreset_trained_agent[0].q_alpha = agent.q_alpha #sqv.set_global('RECT_WIDTH', random.randint(10, 20)) #sqv.set_global('RECT_HEIGHT', random.randint(10, 20)) d = activate_agent(100, agents=reset_trained_agent, render=False, print_info=False) d = get_agent_dict(d, 0) if i == 0: for j in d: d_reset[j] = np.array(d[j]) if isinstance(d[j], list) else d[j] else: for j in d_reset: if isinstance(d_reset[j], np.ndarray) and d_reset[j].dtype == 'float': d_reset[j] = (float(i) * d_reset[j] + np.array(d[j])) / float(i + 1) d = activate_agent(100, agents=nonreset_trained_agent, render=False, print_info=False) d = get_agent_dict(d, 0) if i == 0: for j in d: d_nonreset[j] = np.array(d[j]) if isinstance(d[j], list) else d[j] else: for j in d_nonreset: if isinstance(d_nonreset[j], np.ndarray) and d_nonreset[j].dtype == 'float': d_nonreset[j] = (float(i) * d_nonreset[j] + np.array(d[j])) / float(i + 1) d = activate_agent(100, render=False, print_info=False, agents=[deepcopy(agent)]) d = get_agent_dict(d, 0) if i == 0: for j in d: d_nontrained[j] = np.array(d[j]) if isinstance(d[j], list) else d[j] else: for j in d_nontrained: if isinstance(d_nontrained[j], np.ndarray) and d_nontrained[j].dtype == 'float': d_nontrained[j] = (float(i) * d_nontrained[j] + np.array(d[j])) / float(i + 1) print("finished running #%i" % i) #draw_plots(d_reset, use_alpha=True, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False) #draw_plots(d_nonreset, use_alpha=True, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False) #draw_plots(d_nontrained, use_alpha=False, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False) plot_together(d_nontrained['timesteps'], [d_nontrained['errors'], { 'label': 'non trained' }], [d_nonreset['errors'], { 'label': 'non reset' }], [d_reset['errors'], { 'label': 'reset' }], title='Errors') plot_together(d_nontrained['timesteps'], [d_nontrained['tds'], { 'label': 'non trained' }], [d_nonreset['tds'], { 'label': 'non reset' }], [d_reset['tds'], { 'label': 'reset' }], title='TDs') from IPython import embed embed()