Ejemplo n.º 1
0
def main():

    MAP = map_8
    map_size = MAP[1]

    beta = 0.999
    env = FrozenLakeEnv(desc=MAP[0], is_slippery=True)
    print("Game Map:")
    env.render()

    start_time = time.time()
    v, pi = sync_policy_iteration(env, beta=beta)
    v_np, pi_np = np.array(v), np.array(pi)
    end_time = time.time()
    run_time['Sync Policy Iteration'] = end_time - start_time
    print("time:", run_time['Sync Policy Iteration'])

    print_results(v, pi, map_size, env, beta, 'sync_pi_gs')

    from copy import deepcopy
    temp_dict = deepcopy(run_time)
    print("All:")
    for _ in range(len(temp_dict)):
        min_v = float('inf')
        for k, v in temp_dict.items():
            if v is None:
                continue
            if v < min_v:
                min_v = v
                name = k
        temp_dict[name] = float('inf')
        print(name + ": " + str(min_v))
        print()
Ejemplo n.º 2
0
def main():

    MAP = map_DH
    map_size = MAP[1]
    simulator = FrozenLakeEnv(desc=MAP[0])

    epsilon = 0.3  # Select 0.3 or 0.05
    learning_rate = 0.001  # Select 0.001 or 0.1
    learning_episodes = 30000  # 30000 (DH) or 100000 (16)
    test_interval = 100
    do_test = True

    sarsa_agent = SARSAagent(simulator,
                             epsilon,
                             learning_rate,
                             learning_episodes,
                             map_size,
                             test_interval=test_interval,
                             do_test=do_test)
    start_time = time.time()
    total_rewards, q_table = sarsa_agent.learn_and_evaluate()
    run_time['SARSA agent'] = time.time() - start_time
    print("Learning time:\n")
    print(run_time['SARSA agent'])
    if do_test:
        plot_result(total_rewards, sarsa_agent.test_interval,
                    [sarsa_agent.agent_name])
    plot_image(q_table, MAP[0], map_size)
Ejemplo n.º 3
0
def main():

    MAP = map_8
    map_size = MAP[1]

    beta = 0.999
    env = FrozenLakeEnv(desc=MAP[0], is_slippery=True)
    print("Game Map:")
    env.render()

    start_time = time.time()
    v, pi = sync_value_iteration(env, beta=beta)
    v_np, pi_np = np.array(v), np.array(pi)
    end_time = time.time()
    run_time['Sync Value Iteration'] = end_time - start_time
    print("time:", run_time['Sync Value Iteration'])

    print_results(v, pi, map_size, env, beta, 'sync_vi_gs')

    ray.shutdown()
    ray.init(include_webui=False,
             ignore_reinit_error=True,
             redis_max_memory=500000000,
             object_store_memory=5000000000)

    beta = 0.999
    env = FrozenLakeEnv(desc=MAP[0], is_slippery=True)
    print("Game Map:")
    env.render()

    start_time = time.time()
    v, pi = distribured_value_iteraion(env, beta=beta, workers_num=4)
    v_np, pi_np = np.array(v), np.array(pi)
    end_time = time.time()
    run_time['Sync distributed VI'] = end_time - start_time
    print("time:", run_time['Sync distributed VI'])
    print_results(v, pi, map_size, env, beta, 'dist_vi')

    from copy import deepcopy
    temp_dict = deepcopy(run_time)
    print("All:")
    for _ in range(len(temp_dict)):
        min_v = float('inf')
        for k, v in temp_dict.items():
            if v is None:
                continue
            if v < min_v:
                min_v = v
                name = k
        temp_dict[name] = float('inf')
        print(name + ": " + str(min_v))
        print()
Ejemplo n.º 4
0
    def learn_and_evaluate(self, MAP):
        workers_id = []

        for _ in range(self.cw_num):
            simulator = FrozenLakeEnv(desc=MAP[0])
            collecting_worker.remote(self.server, simulator, self.epsilon)
        for _ in range(self.ew_num):
            worker_id = evaluation_worker.remote(self.server, simulator)
            workers_id.append(worker_id)

        ray.wait(workers_id, len(workers_id))
        return ray.get(self.server.get_reuslts.remote())
Ejemplo n.º 5
0
def main():

    ray.shutdown()
    ray.init(include_webui=False,
             ignore_reinit_error=True,
             redis_max_memory=500000000,
             object_store_memory=5000000000,
             temp_dir='~/ray_tmp')

    MAP = map_DH
    map_size = MAP[1]
    simulator = FrozenLakeEnv(desc=MAP[0])

    simulator.reset()
    epsilon = 0.3  # Fix epsilon as 0.3
    learning_rate = 0.001  # Select 0.001 or 0.1
    learning_episodes = 30000  # 30000 (DH) or 100000 (16)
    test_interval = 100
    batch_size = 100
    do_test = True

    start_time = time.time()
    distributed_ql_agent = distributed_QL_agent(simulator,
                                                epsilon,
                                                learning_rate,
                                                learning_episodes,
                                                map_size,
                                                test_interval=test_interval,
                                                batch_size=batch_size,
                                                cw_num=8,
                                                ew_num=4,
                                                do_test=do_test)
    total_rewards, q_table = distributed_ql_agent.learn_and_evaluate(MAP)
    run_time['Distributed Q-learning agent'] = time.time() - start_time
    print("Learning time:\n")
    print(run_time['Distributed Q-learning agent'])
    if do_test:
        plot_result(total_rewards, test_interval,
                    [distributed_ql_agent.agent_name])
    plot_image(q_table, MAP[0], map_size)
Ejemplo n.º 6
0
            w_id = VI_worker.remote(VI_server, data_id, finish_worker,
                                    start_state, end_state)
            workers_list.append(w_id)

            start_state += batch_size
            if start_state > S:
                start_state = S
            end_state += batch_size
            if end_state > S:
                end_state = S

        error = ray.get(VI_server.get_error_and_update.remote())

    v, pi = ray.get(VI_server.get_value_and_policy.remote())
    return v, pi


beta = 0.999
env = FrozenLakeEnv(desc=MAP[0], is_slippery=True)
print("Game Map:")
env.render()

start_time = time.time()
v, pi = sync_value_iteration_distributed(env, beta=beta, workers_num=4)
v_np, pi_np = np.array(v), np.array(pi)
end_time = time.time()
run_time['Sync dis'] = end_time - start_time
print("time:", run_time['Sync distributed v2'])
print_results(v, pi, map_size, env, beta, 'dist_vi')