Ejemplo n.º 1
0
def main():
    agent_dict = []
    errors = []
    wall1 = MovingCube(1)
    print('began running at %s' %  datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))
    for i in range(1):

        learner = NeuralNetwork(cru.AGENT_LEARNER_NETWORK_SHAPE, cru.linear_relu, min=-0.1, max=0.1)
        curious_agent = CuriousAgent(0)
        curious_agent.learner = learner
        #sqv.set_global('AGENTS_COUNT', 1)
        d = activate_agent(10, 1000, render=False, print_info=False, reset_env=False, reset_agent=True,
                           agents=[curious_agent, wall1], init_learners=[deepcopy(learner)]*2, get_last_step_avg_error=True,
                           moving_walls_amount=1, moving_wall_start_index=1)

        agent_dict.append(get_agent_dict(d))
        print('finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")))
    agent_dict = join_dict_list(agent_dict)
    fig1, ax1 = plot_together(np.arange(len(agent_dict['last_errors'])), [agent_dict['last_errors'],{'label':'curious', 'color':'blue'}],
                              title='Total Errors STD', axis_labels=['epoch', 'last error'])

    plt.show()

    from IPython import embed
    embed()
Ejemplo n.º 2
0
def main():
    env = gym.make("square-v0")
    state = env.reset()
    agent = CuriousAgent()
    error = 1.0
    tds = []
    errors = []
    timesteps = []
    rewards = []
    costs = []
    infos = []

    for timestep in range(200):
        state, error, info, td, reward, prediction = agent.take_step(
            env, state, error)
        errors.append(error)
        tds.append(td)
        rewards.append(reward)
        infos.append(info)
        timesteps.append(timestep)
        if timestep % 50 == 0:
            print("state: " + str(state))
            print("pred:" + str(np.round(prediction)))
        #learner_c = agent.train(300)
        #costs.append(np.sqrt(learner_c))
        env.render()

    print(agent.learner_alpha)
    print(agent.value_alpha)
    print(agent.epsilon)

    locs = info_to_location(infos)
    scales = loc_to_scalar(locs)
    print(scales)
    import matplotlib.pyplot as plt
    plt.figure(timesteps, scales)
    plt.plot(timesteps, tds)
    plt.title("TDS")
    plt.axis([min(timesteps), max(timesteps), min(tds), max(tds)])

    fig, ax = plt.subplots(1, 1)
    ax.plot(timesteps, errors)

    ax.plot(timesteps)
    ax.set_title("Errors")
    ax([min(timesteps), max(timesteps), min(errors), max(errors)])

    plt.figure()
    plt.plot(timesteps, rewards)
    plt.title("Rewards")
    plt.axis([min(timesteps), max(timesteps), min(rewards), max(rewards)])

    plt.show()
Ejemplo n.º 3
0
def main():
    agent_dict = []
    random_dict = []

    random_agent = RandomAgent(0)
    wall1, wall2 = MovingCube(1), MovingCube(2)
    print('began running at %s' %
          datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))
    color_map_agent = []
    sqv.set_global("AGENTS_COUNT", NUMBER_OF_AGENTS)
    for i in range(1):

        learner = NeuralNetwork(cru.AGENT_LEARNER_NETWORK_SHAPE,
                                cru.linear_relu,
                                min=-0.01,
                                max=0.01)
        curious_agent = CuriousAgent(0)
        curious_agent.learner = deepcopy(learner)
        d = activate_agent(MAX_STEPS,
                           number_of_epoches=NUM_OF_EPOCHES,
                           render=False,
                           print_info=False,
                           reset_env=False,
                           agents=[curious_agent],
                           get_avg_errors=False,
                           get_values_field=True,
                           number_of_error_agents=1)
        curious_agent.learner = deepcopy(learner)
        d1 = activate_agent(MAX_STEPS,
                            number_of_epoches=1,
                            render=False,
                            print_info=False,
                            reset_env=False,
                            agents=[curious_agent],
                            get_avg_errors=True,
                            get_values_field=True,
                            number_of_error_agents=1)
        d['total_errors'] = d1['total_errors']
        agent_dict.append(get_agent_dict(d))
        color_map_agent.append(stats.get_color_map(curious_agent))
        random_agent.learner = learner
        d = activate_agent(MAX_STEPS,
                           render=False,
                           print_info=False,
                           reset_env=False,
                           agents=[random_agent],
                           get_avg_errors=True,
                           number_of_error_agents=1)

        random_dict.append(get_agent_dict(d))

        print(
            'finished running #%i at %s' %
            (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")))

    means_curious = []
    for i in agent_dict:
        means_curious.append(i['total_errors'])
    std_agent = np.array(means_curious).std(axis=0)

    color_map_agent = np.array(color_map_agent).mean(axis=0)

    figs, axes = draw_color_maps(color_map_agent)

    means_random = []
    for i in random_dict:
        means_random.append(i['total_errors'])
    std_random = np.array(means_random).std(axis=0)

    agent_dict = join_dict_list(agent_dict)
    #draw_plots(agent_dict)
    random_dict = join_dict_list(random_dict)
    #draw_plots(random_dict)

    #fig, ax ,q = plot_field(*agent_dict['fields'], title='Agent Value Field', color=agent_dict['fields_colors'])

    errors_rate_curious = agent_dict['total_errors']
    errors_rate_random = random_dict['total_errors']
    last_td_agent = np.zeros((len(agent_dict['epoches_tds']), ))

    for i, v in enumerate(agent_dict['epoches_tds']):
        last_td_agent[i] = v[-1]

    fig, ax = plot_together(
        np.arange(len(last_td_agent)),
        [last_td_agent, {
            'label': 'curious',
            'color': 'blue'
        }],
        title='Epochs Last TD',
        axis_labels=['epoch', 'last TD'])

    fig1, ax1 = plot_together(
        random_dict['timesteps'],
        [errors_rate_curious, {
            'label': 'curious',
            'color': 'blue'
        }], [errors_rate_random, {
            'label': 'random',
            'color': 'red'
        }],
        title='Total Errors STD',
        std=[std_agent, std_random],
        axis_labels=['steps', 'total error'])

    fig2, ax2 = plot_together(
        random_dict['timesteps'],
        [errors_rate_curious, {
            'label': 'curious',
            'color': 'blue'
        }], [errors_rate_random, {
            'label': 'random',
            'color': 'red'
        }],
        title='Total Errors Means',
        means=[means_curious, means_random],
        axis_labels=['steps', 'total error'])

    fig3, ax3 = plot_together(random_dict['timesteps'][:-1], [
        stats.derivative(errors_rate_curious), {
            'label': 'curious',
            'color': 'blue'
        }
    ], [
        stats.derivative(errors_rate_random), {
            'label': 'random',
            'color': 'red'
        }
    ],
                              title='Total Errors Derivative',
                              axis_labels=['steps', 'total error'])

    fig1.savefig('./plots/std.png')
    fig2.savefig('./plots/means.png')
    plt.show()
def activate_agent(epoch_time, number_of_epoches=1, number_of_agents=1, reset_agent=True, agents=None,
                   render=True, print_info=True, reset_env=False, env=None, get_avg_errors=False, set_cube=0,
                   get_values_field=False, moving_walls_amount=0, moving_wall_start_index=0, init_learners=None,
                   get_last_step_avg_error=False, number_of_error_agents=1):
    if env is None:
        env = gym.make('square-v0')
    states = env.reset(render=render)
    if agents is None:
        agents = []
    number_of_agents = max(number_of_agents, len(agents))
    for i in range(len(agents), number_of_agents):
        agents.append(CuriousAgent(i))

    list_of_q = []

    total_errors = [[] for _ in range(number_of_agents)]
    last_errors = [[] for _ in range(number_of_agents)]

    agent_errors = [0] * number_of_agents
    tds = [[] for _ in range(number_of_agents)]
    errors = [[] for _ in range(number_of_agents)]
    timesteps = [[] for _ in range(number_of_agents)]
    rewards = [[] for _ in range(number_of_agents)]
    costs = [[] for _ in range(number_of_agents)]
    infos = [[] for _ in range(number_of_agents)]
    epoches_errors = [[] for _ in range(number_of_agents)]
    epoch_error = [[] for _ in range(number_of_agents)]
    epoches_tds = [[] for _ in range(number_of_agents)]
    epoch_td = [[] for _ in range(number_of_agents)]
    values_before = [[np.zeros((sqv.RECT_WIDTH + 1, sqv.RECT_HEIGHT + 1)) for _ in range(4)] for _ in range(number_of_agents)]

    for t in range(number_of_agents):
        for x in range(sqv.RECT_WIDTH + 1):
            for y in range(sqv.RECT_HEIGHT + 1):
                env.agents[t]["loc"] = np.array([x, y])
                for i in range(4):
                    ob, _, _, _ = env.step(np.array([0]), 0)
                    values_before[t][i][x, y] += np.amax(agents[t].q_function.hypot(ob))
        env.agents[t]['loc'] = np.array(sqv.INIT_LOCATIONS[t])
        env.agents[t]['dir'] = np.array(sqv.INIT_DIRECTIONS[t])

    for timestep in range(number_of_epoches * epoch_time):
        for i, agent in enumerate(agents):
            state = states[i]
            error = agent_errors[i]
            state, error, info, td, reward, prediction = agent.take_step(env, state, error)
            states[i] = state
            agent_errors[i] = error
            errors[i].append(error)
            tds[i].append(td)
            rewards[i].append(reward)
            infos[i].append(info)
            timesteps[i].append(timestep)
            epoch_td[i].append(td)
            epoch_error[i].append(error)
            list_of_q.append(deepcopy(agent.q_function.layers))
            if print_info:
                if timestep % PRINT_STATE_PRED == 0:
                    print("state: " + str(state))
                    print("prediction: " + str(np.round(prediction)))
                if timestep % PRINT_TIME_STEP == 0:
                    print("time step: " + str(timestep))
            if timestep % epoch_time == 0 and timestep != 0:
                epoches_errors[i].append(epoch_error[i])
                epoch_error[i] = []
                epoches_tds[i].append(epoch_td[i])
                epoch_td[i] = []
                if get_last_step_avg_error and i < number_of_error_agents:
                    last_errors[i].append(stats.average_errors_on_trained_agent(agent, env))
                if reset_agent:
                    agent.reset_network()
                    if init_learners is not None:
                        agent.learner = deepcopy(init_learners[i])
                    env.agents[i]["loc"] = env.square_space.sample()
                    states[i] = env._get_all_observations()[i]
            if reset_env and i + 1 == len(agents) and timestep % epoch_time == 0 and timestep != 0:
                if render:
                    env.close()
                sqv.set_global('RECT_WIDTH', random.randint(15, 15))
                sqv.set_global('RECT_HEIGHT', random.randint(15, 15))
                env = gym.make('square-v0')
                states = env.reset(render=render)
                for c in range(moving_walls_amount):
                    sqv.INIT_LOCATIONS[c + moving_wall_start_index] = env.square_space.sample()
                    sqv.INIT_DIRECTIONS[c + moving_wall_start_index] = random.choice(stats.ALL_DIRECTIONS)
                for c in range(set_cube):
                    sqv.INIT_LOCATIONS[c + number_of_agents + moving_walls_amount] = env.square_space.sample()
            if get_avg_errors and i < number_of_error_agents:
                total_errors[i].append(stats.func2(agent, env))
        # learner_c = agent.train(300)
        # costs.append(np.sqrt(learner_c))
        if render:
            env.render()
    for i in range(number_of_agents):
        epoches_errors[i].append(epoch_error[i])
        epoches_tds[i].append(epoch_td[i])

    values = [[np.zeros((sqv.RECT_WIDTH + 1, sqv.RECT_HEIGHT + 1)) for _ in range(4)] for _ in range(number_of_agents)]

    for t in range(number_of_agents):
        for x in range(sqv.RECT_WIDTH + 1):
            for y in range(sqv.RECT_HEIGHT + 1):
                env.agents[t]["loc"] = np.array([x, y])
                for i in range(4):
                    ob, _, _, _ = env.step(np.array([0]), 0)
                    values[t][i][x, y] += np.amax(agents[t].q_function.hypot(ob))

    if render:
        env.close()

    ret = {}
    if get_values_field:
        q = []
        cs = []
        for i in agents:
            v, c = stats.get_agent_value_field(i, env)
            q.append(v)
            cs.append(c)
        ret['fields'] = q
        ret['fields_colors'] = cs

    ret['agents'] = agents
    ret['tds'] = tds
    ret['errors'] = errors
    ret['timesteps'] = timesteps
    ret['rewards'] = rewards
    ret['costs'] = costs
    ret['infos'] = infos
    ret['epoches_errors'] = epoches_errors
    ret['epoches_tds'] = epoches_tds
    ret['values_before'] = values_before
    ret['values'] = values
    ret['total_errors'] = total_errors
    ret['last_errors'] = last_errors

    return ret
Ejemplo n.º 5
0
def main():

    d_nonreset = {}
    d_reset = {}
    d_nontrained = {}

    for i in range(100):
        agent = CuriousAgent(0)
        d = activate_agent(10,
                           10,
                           render=False,
                           print_info=False,
                           agents=[deepcopy(agent)])
        reset_trained_agent = d['agents']
        reset_trained_agent[0].reset_network()
        reset_trained_agent[0].learner = deepcopy(agent.learner)
        #reset_trained_agent[0].q_alpha = agent.q_alpha

        d = activate_agent(100,
                           reset_agent=False,
                           render=False,
                           print_info=False,
                           agents=[deepcopy(agent)])
        nonreset_trained_agent = d['agents']
        nonreset_trained_agent[0].reset_network()
        nonreset_trained_agent[0].learner = deepcopy(agent.learner)

        #draw_plots(get_agent_dict(d,0), plot_values_before=False, plot_values=False,plot_errors=False, plot_locations_bars=False)
        #nonreset_trained_agent[0].q_alpha = agent.q_alpha

        #sqv.set_global('RECT_WIDTH', random.randint(10, 20))
        #sqv.set_global('RECT_HEIGHT', random.randint(10, 20))

        d = activate_agent(100,
                           agents=reset_trained_agent,
                           render=False,
                           print_info=False)
        d = get_agent_dict(d, 0)
        if i == 0:
            for j in d:
                d_reset[j] = np.array(d[j]) if isinstance(d[j], list) else d[j]
        else:
            for j in d_reset:
                if isinstance(d_reset[j],
                              np.ndarray) and d_reset[j].dtype == 'float':
                    d_reset[j] = (float(i) * d_reset[j] +
                                  np.array(d[j])) / float(i + 1)

        d = activate_agent(100,
                           agents=nonreset_trained_agent,
                           render=False,
                           print_info=False)
        d = get_agent_dict(d, 0)
        if i == 0:
            for j in d:
                d_nonreset[j] = np.array(d[j]) if isinstance(d[j],
                                                             list) else d[j]
        else:
            for j in d_nonreset:
                if isinstance(d_nonreset[j],
                              np.ndarray) and d_nonreset[j].dtype == 'float':
                    d_nonreset[j] = (float(i) * d_nonreset[j] +
                                     np.array(d[j])) / float(i + 1)

        d = activate_agent(100,
                           render=False,
                           print_info=False,
                           agents=[deepcopy(agent)])
        d = get_agent_dict(d, 0)
        if i == 0:
            for j in d:
                d_nontrained[j] = np.array(d[j]) if isinstance(d[j],
                                                               list) else d[j]
        else:
            for j in d_nontrained:
                if isinstance(d_nontrained[j],
                              np.ndarray) and d_nontrained[j].dtype == 'float':
                    d_nontrained[j] = (float(i) * d_nontrained[j] +
                                       np.array(d[j])) / float(i + 1)

        print("finished running #%i" % i)

    #draw_plots(d_reset, use_alpha=True, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False)

    #draw_plots(d_nonreset, use_alpha=True, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False)

    #draw_plots(d_nontrained, use_alpha=False, plot_locs_on_errors=False, plot_locs_on_tds=False, plot_values=False, plot_values_before=False)

    plot_together(d_nontrained['timesteps'],
                  [d_nontrained['errors'], {
                      'label': 'non trained'
                  }], [d_nonreset['errors'], {
                      'label': 'non reset'
                  }], [d_reset['errors'], {
                      'label': 'reset'
                  }],
                  title='Errors')

    plot_together(d_nontrained['timesteps'],
                  [d_nontrained['tds'], {
                      'label': 'non trained'
                  }], [d_nonreset['tds'], {
                      'label': 'non reset'
                  }], [d_reset['tds'], {
                      'label': 'reset'
                  }],
                  title='TDs')

    from IPython import embed
    embed()