Beispiel #1
0
def run_agent(env, tests_moment, gamma=1, theta=1e-8):

    global _ENVIROMENT_CLASS
    _ENVIROMENT_CLASS = enviroment_choose.env_choose(env)
    tmp = policy_iteration(env, gamma=gamma, theta=theta)
    agent_info = {
        "policy": tmp[0],
        "state_action_table": tmp[1]
    }


    '''
    TESTING
    '''
    #Ottengo dall'ambiente i tipi di test che mi puo' restituire
    type_test_list = _ENVIROMENT_CLASS.type_test()
    tests_result = []
    tmp_tests_result = {}
    n_test = 100
    n_episodes_test = 100

    for type_test in type_test_list:
        tmp_tests_result.update({type_test: []})


    for _ in tqdm(range(n_test)):

        test_iteration_i = {}
        for type_test in type_test_list:
            test_iteration_i.update({type_test: 0})

        #Per ogni test eseguiamo 100 "episodi"
        for _ in range(n_episodes_test):

            done = False
            state = _ENVIROMENT_CLASS.reset_env(env)

            while not done:
                action = np.argmax(agent_info["policy"][state]) # Use the best learned action
                test_dict = _ENVIROMENT_CLASS.test_policy(env, action)
                state = test_dict["env_info"]["next_state"]
                done = test_dict["env_info"]["done"]

                for type_test in type_test_list:
                    test_iteration_i[type_test] += test_dict[type_test]

        for type_test in type_test_list:
            test_iteration_i[type_test] = test_iteration_i[type_test] / n_episodes_test

        tests_result.append(test_iteration_i)


    for type_test in tmp_tests_result:
        for test in tests_result:
            tmp_tests_result[type_test].append(test[type_test])


    return {"agent_info": agent_info, "tests_result": tmp_tests_result}
def run_agent(env,
              tests_moment,
              n_games,
              n_episodes,
              alpha=0.1,
              gamma=0.6,
              epsilon=0.1,
              n_step=10,
              lambd=0.92):

    global _ENVIROMENT_CLASS
    global _ENV
    global _N_GAMES
    global _N_EPISODES
    global _ALPHA
    global _GAMMA
    global _EPSILON
    global _LAMBDA
    global _N_STEP
    global _ESTIMATOR
    global _TESTS_MOMENT

    _ENVIROMENT_CLASS = enviroment_choose.env_choose(env)
    _ENV = env
    _N_GAMES = n_games
    _N_EPISODES = n_episodes
    _ALPHA = alpha
    _GAMMA = gamma
    _EPSILON = epsilon
    _LAMBDA = lambd
    _N_STEP = n_step
    _ESTIMATOR = QEstimator(env=_ENV, step_size=_ALPHA, \
        num_tilings=_ENVIROMENT_CLASS.num_tilings(), \
        max_size=_ENVIROMENT_CLASS.IHT_max_size(), trace=True)
    _TESTS_MOMENT = tests_moment

    results = sarsa_lambda()

    tests_result_dict = {}

    for type_test in _TYPE_TEST_LIST:
        tests_result_dict.update({type_test: []})

    for type_test in tests_result_dict:
        for test in results["tests_result"]:
            tests_result_dict[type_test].append(test[type_test])

    return {
        "agent_info": results["agent_info"],
        "tests_result": tests_result_dict
    }
def run_agent(env,
              tests_moment,
              n_games,
              n_episodes,
              alpha=0.1,
              gamma=0.6,
              epsilon=0.1,
              n_step=10):

    global _ENVIROMENT_CLASS
    global _ENV
    global _N_GAMES
    global _N_EPISODES
    global _ALPHA
    global _GAMMA
    global _EPSILON
    global _N_STEP
    global _TESTS_MOMENT

    _ENVIROMENT_CLASS = enviroment_choose.env_choose(env)
    _ENV = env
    _N_GAMES = n_games
    _N_EPISODES = n_episodes
    _ALPHA = alpha
    _GAMMA = gamma
    _EPSILON = epsilon
    _N_STEP = n_step
    _TESTS_MOMENT = tests_moment

    results = n_step_sarsa()
    tests_result_dict = {}

    for type_test in _TYPE_TEST_LIST:
        tests_result_dict.update({type_test: []})

    for type_test in tests_result_dict:
        for test in results["tests_result"]:
            tests_result_dict[type_test].append(test[type_test])

    return {
        "agent_info": results["agent_info"],
        "tests_result": tests_result_dict
    }
    for i in range(len(tests_i_agent)):
        for j in range(len(tests_i_agent[i][test_type])):
            tmp[j][i] = tests_i_agent[i][test_type][j]

    percentile_90 = []
    for i in range(len(tmp)):
        percentile_90.append(np.percentile(tmp[i], 90))

    return percentile_90


if __name__ == '__main__':

    env_name = input("Insert the enviroment name: ")
    env = enviroment_choose.env_choose(env_name)
    tests_moment = input("Select the test type (final, on_run, ten_perc): ")
    how_group_same_agent = input("Select how group the results of same agent \n" + \
        "(Average, 10th percentile, Quartile 1, Median, Quartile 3, 90th percentile): ")
    number_of_agent_for_type = int(
        input("Insert the number of best agent for every type of agent: "))
    base_path = "docs/" + env_name + "/" + tests_moment + "/"

    all_agent_tests = {}
    all_agent_legend = {}
    agent_type_list = [x[1] for x in os.walk(base_path)]

    for agent_type in agent_type_list[0]:

        path = base_path + agent_type