Esempio n. 1
0
def generate_graph(loop_no=None):
    wall_1, wall_2, key_1, key_2, door_1, door_2 = generate_room(loop_no)
    G = nx.DiGraph()
    for i in range(100):
        if i < 50:
            rand_x = np.random.randint(1, 3)
            rand_y = np.random.randint(5, 7)
            start = (rand_x, rand_y)
            env_name = 'gym_minigrid.envs:MiniGrid-MainRoom-v1'
            env = gym.make(env_name,
                           agent_start=start,
                           key_pos=[key_1, key_2],
                           wall_pos=[wall_1, wall_2],
                           door_pos=[door_1, door_2])
        else:
            start_pos = (6, 2)
            if key_2 != (6, 1):
                start_pos = (6, 1)
            env = gym.make('MiniGrid-MainRoom-v1',
                           agent_start=start_pos,
                           keys_carried=[1, 0],
                           door_state=[1, 0],
                           key_pos=[key_1, key_2],
                           wall_pos=[wall_1, wall_2],
                           door_pos=[door_1, door_2])
        number_of_actions = env.action_space.n
        agent = QLearningAgent(number_of_actions,
                               gamma=0.9,
                               alpha=0.12,
                               epsilon=0.1)
        state = env.reset()
        for i in range(1000):
            action = np.random.randint(0, number_of_actions)
            next_state, reward, done, _ = env.step(action)
            agent.update(state, action, next_state, reward)

            if done:
                break
            G.add_edge(state, next_state, action=action)
            state = next_state
    return G, number_of_actions
Esempio n. 2
0
def run_q_learning(env, num_episodes, gamma, alpha, epsilon):
    agent = QLearningAgent(env.action_space.n,
                           gamma=gamma,
                           alpha=alpha,
                           epsilon=epsilon)

    stats = {
        'episode_lengths': np.zeros(num_episodes),
        'episode_rewards': np.zeros(num_episodes)
    }

    for i_episode in range(num_episodes):

        if (i_episode + 1) % 20 == 0:
            print("\rEpisode {}/{}.".format(i_episode + 1, num_episodes),
                  end="")
            sys.stdout.flush()

        state = env.reset()
        done = False
        t = 0
        while not done:
            action = agent.step(state)
            next_state, reward, done, _ = env.step(action)

            # Update statistics
            stats['episode_rewards'][i_episode] += reward
            stats['episode_lengths'][i_episode] = t

            agent.update(state, action, next_state, reward)
            t += 1
            state = next_state

    print()

    return agent, stats
Esempio n. 3
0
def run_q_learning(num_episodes,
                   max_eps_length,
                   env,
                   dom_no,
                   loop_no,
                   run_no,
                   with_options=False,
                   factored=False):
    if with_options:
        print('with skills')
        if factored:
            skills_file = open(
                RESULTS_PATH + 'generated_options/' + str(dom_no) + '/' +
                str(loop_no) + '/' + str(run_no) + '_factored_skills.pickle',
                "rb")
        else:
            skills_file = open(
                RESULTS_PATH + 'generated_options/' + str(dom_no) +
                '/generated_skills.pickle', "rb")
        skills = pickle.load(skills_file)
        number_of_actions = env.action_space.n + len(skills)
    else:
        number_of_actions = env.action_space.n
    agent = QLearningAgent(number_of_actions,
                           gamma=0.9,
                           alpha=0.12,
                           epsilon=0.1)

    stats = EpisodeStats(episode_lengths=np.zeros(num_episodes),
                         episode_rewards=np.zeros(num_episodes))

    input_list = []
    output_list = []
    for i_episode in range(num_episodes):

        if (i_episode + 1) % 20 == 0:
            print("\rEpisode {}/{}.".format(i_episode + 1, num_episodes),
                  end="")
            sys.stdout.flush()

        state = env.reset()
        for t in range(max_eps_length):

            action = agent.step(state)
            if action >= env.action_space.n:
                input = state + (action - env.action_space.n, )
                input_list.append(list(input))
                option = skills[action - env.action_space.n]
                if factored:
                    state_in = option.factor_state(state, global_set)
                else:
                    state_in = state
                if option.in_initialisation_set(state_in):
                    next_state, reward, done, _, total_steps = execute_option(
                        25,
                        env,
                        option,
                        state_in,
                        factored=factored,
                        state_uf=state)
                    stats.episode_lengths[i_episode] = t + total_steps - 1
                else:
                    next_state = state
                    reward = -1
                    done = False
                output_list.append(list(next_state))
            else:
                next_state, reward, done, _ = env.step(action)
                stats.episode_lengths[i_episode] = t

            # Update statistics
            stats.episode_rewards[i_episode] += reward

            agent.update(state, action, next_state, reward)

            if done:
                break

            state = next_state
    cols = global_set
    df_out = pd.DataFrame.from_records(output_list, columns=cols)
    cols.append('Action')
    df_in = pd.DataFrame.from_records(input_list, columns=cols)
    data_path = './data/' + str(loop_no)
    if not (os.path.isdir(data_path)):
        os.makedirs(data_path)
    if run_no > 0:
        print("SHOULDNT B HERE")
        df_in2 = pd.read_csv(data_path + '/input_data_task_' +
                             str(run_no - 1) + '.csv',
                             index_col=False)
        df_out2 = pd.read_csv(data_path + '/output_data_task_' +
                              str(run_no - 1) + '.csv',
                              index_col=False)
        df_in = df_in.append(df_in2, ignore_index=True, sort=False)
        df_out = df_out.append(df_out2, ignore_index=True, sort=False)
    df_in.drop(df_in.filter(regex='Unname'), axis=1, inplace=True)
    df_out.drop(df_out.filter(regex='Unname'), axis=1, inplace=True)
    df_in.to_csv(data_path + '/input_data_task_' + str(run_no) + '.csv')
    df_out.to_csv(data_path + '/output_data_task_' + str(run_no) + '.csv')
    return agent, stats