Beispiel #1
0
def try_random_agent(num_episodes=const.num_episodes_test):
    env = utils_env.Environment()
    brain_name = env.brain_names[0]

    for i in range(num_episodes):
        env_info = env.reset(
            train_mode=False)[brain_name]  # reset the environment
        states = env_info.vector_observations  # get the current state (for each agent)
        scores = np.zeros(
            const.num_agents)  # initialize the score (for each agent)

        while True:
            actions = np.random.randn(
                const.num_agents,
                action_size)  # select an action (for each agent)
            actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
            env_info = env.step(actions)[
                brain_name]  # send all actions to tne environment
            next_states = env_info.vector_observations  # get next state (for each agent)
            rewards = env_info.rewards  # get reward (for each agent)
            dones = env_info.local_done  # see if episode finished
            scores += rewards  # update the score (for each agent)
            states = next_states  # roll over states to next time step
            if np.any(dones):  # exit loop if episode finished
                break

        print('Episode {}, score (max over agents): {}'.format(
            i, np.max(scores)))
        print('Episode {}, score of each agent: ['.format(i),
              '; '.join(['{:.3f}'.format(s) for s in scores]), ']')

    env.close()
def grid_search():
    env = utils_env.Environment()

    print('=' * 30, 'Grid Search', '=' * 30)

    params = {
        'num_episodes': [200, 250, 500],  # --> 250
        'batch_size': [32, 64, 128, 256],  # --> 32
        'expl_noise': [0.1, 0.3],  # --> 0.3
        'gamma': [0.95, 0.99],  # --> 0.95
        'model_learning_rate': [0.001, 0.0001, 0.00001],  # --> 0.001
        'num_fc_actor': [128, 64, 32],  # --> 128
        'num_fc_critic': [128, 64, 32],  # --> 128
        'memory_size': [20000, 40000]  # --> 40000
    }

    grid = ParameterGrid(params)
    rf = MyNavigator()

    best_score = -10.
    best_grid = None
    best_grid_index = 0
    result_dict = {}
    key_list = list(params.keys()) + ['score']
    df = pd.DataFrame(columns=key_list)

    for i, g in enumerate(grid):
        rf.set_params(**g)
        score = rf.fit(i, env)
        result_dict[i] = {'score': score, 'grid': g}

        d = g
        d['score'] = score
        df = df.append(d, ignore_index=True)

        print('Evaluated candidate:', i, result_dict[i])
        # save if best
        if score >= best_score:
            best_score = score
            best_grid = g
            best_grid_index = i

    for k, v in result_dict.items():
        print(k, v)

    print("==> Best score:", best_score)
    print("==> Best grid:", best_grid_index, best_grid)

    if len(key_list) == 3:  # better overview as pivot table (only for 2 hyperparams)
        for c in params.keys():  # if one hyperparam is a list of values
            if df[c].dtype == object:
                df[c] = df[c].astype(str)

        print(df.pivot(index=key_list[0], columns=key_list[1], values=key_list[2]))
    else:
        print(df)

    env.close()  # finally, close the Env
def test_default_algo(use_ref_model: bool = False):
    env = utils_env.Environment()
    # use default params
    ag = agent.DRLAgent()
    if use_ref_model:
        print('... Test the agent using reference model ...')
        ag.set_model_path('ref')
    al = algo.DRLAlgo(env, ag)
    al.test()
Beispiel #4
0
def test_default_algo(use_ref_model: bool = False):
    env = utils_env.Environment()
    model_name_suffix = ''
    if use_ref_model:
        print('... Test the agent using reference model ...')
        model_name_suffix = 'ref_'

    # use default params
    ag_1 = agent.DRLAgent()
    ag_1.set_model_path(model_name_suffix + str(1))
    ag_2 = agent.DRLAgent()
    ag_2.set_model_path(model_name_suffix + str(2))
    al = algo.DRLAlgo(env, ag_1, ag_2)
    al.test()
Beispiel #5
0
def train_two_agents():
    env = utils_env.Environment()
    # use default params
    ag_1 = agent.DRLAgent()
    ag_1.set_model_path(1)
    ag_2 = agent.DRLAgent()
    ag_2.set_model_path(2)
    al = algo.DRLAlgo(env, ag_1, ag_2)
    history, best_e, best_score = al.train()
    print('\nFinal score: {:.3f}'.format(
        np.mean(history[-const.rolling_mean_N:])))
    print('Final memory length:', ag_1.memory.get_length())
    print('Best score in {:d} episodes, avg_score: {:.3f}'.format(
        best_e, best_score))

    # plot losses
    losses_lists = [
        ag_1.actor_loss_list, ag_2.actor_loss_list, ag_1.critic_loss_list,
        ag_2.critic_loss_list
    ]
    losses_labels = [
        'agent_1_actor', 'agent_2_actor', 'agent_1_critic', 'agent_2_critic'
    ]
    utils_plot.plot_loss(losses_lists, losses_labels)

    # plot noise
    utils_plot.plot_scatter(ag_1.noise_list,
                            title_text='Noise',
                            fp=const.file_path_img_noise)

    # plot memory actions
    memory_actions = np.array([t[1] for t in ag_1.memory.memory])
    utils_plot.plot_scatter(memory_actions,
                            title_text='Actions',
                            fp=const.file_path_img_actions)

    # show mean memory actions
    mean_a = np.mean(memory_actions, axis=0)
    std_a = np.std(memory_actions, axis=0)
    print('Mean/std actions agent_1:', mean_a[:2], std_a[:2])
    print('Mean/std actions agent_2:', mean_a[2:], std_a[2:])
def try_random_agent():
    env = utils_env.Environment()
    brain_name = env.brain_names[0]

    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    states = env_info.vector_observations  # get the current state (for each agent)
    scores = np.zeros(
        const.num_agents)  # initialize the score (for each agent)
    while True:
        actions = np.random.randn(
            const.num_agents, action_size)  # select an action (for each agent)
        actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
        env_info = env.step(actions)[
            brain_name]  # send all actions to tne environment
        next_states = env_info.vector_observations  # get next state (for each agent)
        rewards = env_info.rewards  # get reward (for each agent)
        dones = env_info.local_done  # see if episode finished
        scores += rewards  # update the score (for each agent)
        states = next_states  # roll over states to next time step
        if np.any(dones):  # exit loop if episode finished
            break
    print('Total score (averaged over agents) this episode: {}'.format(
        np.mean(scores)))
    env.close()
Beispiel #7
0
def grid_search():
    env = utils_env.Environment()

    print('=' * 30, 'Grid Search', '=' * 30)

    params = {
        # 'num_episodes': [5, 10],  # test
        'batch_size': [32, 64, 128],
        'use_double_dqn': [True, False],
        'eps_decay_factor': [0.99, 0.95, 0.9],
        'gamma': [0.95, 0.9],
        'update_target_each_iter': [2, 4, 8, 16],
        'model_learning_rate': [0.001, 0.0001, 0.00001],
        'model_fc1_num': [32, 20],
        'model_fc2_num': [16, 10],
        'num_episodes': [625, 700, 1000, 2000],
        'memory_size': [20000, 40000]
    }

    grid = ParameterGrid(params)
    rf = MyNavigator()

    best_score = -10.
    best_grid = None
    best_grid_index = 0
    result_dict = {}
    key_list = list(params.keys()) + ['score']
    df = pd.DataFrame(columns=key_list)

    for i, g in enumerate(grid):
        rf.set_params(**g)
        score = rf.fit(i, env)
        result_dict[i] = {'score': score, 'grid': g}

        d = g
        d['score'] = score
        df = df.append(d, ignore_index=True)

        print('Evaluated candidate:', i, result_dict[i])
        # save if best
        if score >= best_score:
            best_score = score
            best_grid = g
            best_grid_index = i

    for k, v in result_dict.items():
        print(k, v)

    print("==> Best score:", best_score)
    print("==> Best grid:", best_grid_index, best_grid)

    if len(key_list) == 3:  # better overview as pivot table (only for 2 hyperparams)
        for c in params.keys():  # if one hyperparam is a list of values
            if df[c].dtype == object:
                df[c] = df[c].astype(str)

        print(df.pivot(index=key_list[0], columns=key_list[1], values=key_list[2]))
    else:
        print(df)

    env.close()  # finally, close the Env
def get_env_info():
    env = utils_env.Environment()
    env.get_info()
def train_default_algo():
    env = utils_env.Environment()
    # use default params
    ag = agent.DRLAgent()
    al = algo.DRLAlgo(env, ag)
    al.train()
def grid_search():
    env = utils_env.Environment()

    print('=' * 30, 'Grid Search', '=' * 30)

    params = {
        'num_episodes': [500, 1000, 1500],  # --> 1500
        'max_action': [0.1, 0.5, 1.0],  # --> 1.0
        'memory_size': [100000, 200000],  # --> 200000
        'gamma': [0.95, 0.99],  # --> 0.99
        'batch_size': [64, 128, 256],  # --> 128
        'tau': [0.01, 0.05, 0.06, 0.07, 0.1],  # -->0.06
        'policy_freq': [1, 2, 3],  # --> 3
        'model_learning_rate': [0.001, 0.0001],  # --> 0.001
        'num_fc_1': [256, 128, 64, 32, 16]  # --> 256
    }

    grid = ParameterGrid(params)
    rf = MyNavigator()

    best_score = -10.
    best_grid = None
    best_grid_index = 0
    result_dict = {}
    key_list = list(params.keys()) + ['score']
    df = pd.DataFrame(columns=key_list)

    for i, g in enumerate(grid):
        if 'num_fc_1' in key_list:
            g['num_fc_2'] = g['num_fc_1'] // 2
        rf.set_params(**g)
        score = rf.fit(i, env)
        result_dict[i] = {'score': score, 'grid': g}

        d = g
        d['score'] = score
        df = df.append(d, ignore_index=True)

        print('\nEvaluated candidate:', i, result_dict[i])
        # save if best
        if score >= best_score:
            best_score = score
            best_grid = g
            best_grid_index = i

    for k, v in result_dict.items():
        print(k, v)

    print("==> Best score:", best_score)
    print("==> Best grid:", best_grid_index, best_grid)

    if len(key_list
           ) == 3:  # better overview as pivot table (only for 2 hyperparams)
        for c in params.keys():  # if one hyperparam is a list of values
            if df[c].dtype == object:
                df[c] = df[c].astype(str)

        print(
            df.pivot(index=key_list[0],
                     columns=key_list[1],
                     values=key_list[2]))
    else:
        print(df)

    env.close()  # finally, close the Env