예제 #1
0
def test_identity_discrete(agent_number, prioritized_replay):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    :param model_name: (str) Name of the RL model
    """
    env = IdentityEnv(5, agent_number)

    lr = 0.01 if not prioritized_replay else 0.02
    agents = [
        MAD3PGAgent(env.observation_space,
                    env.action_space,
                    idx,
                    batch_size=32,
                    buff_size=10000,
                    lr=lr,
                    num_layer=2,
                    num_units=32,
                    gamma=0.9,
                    tau=0.01,
                    prioritized_replay=prioritized_replay,
                    max_step=50000,
                    _run=FakeRun(),
                    min_val=-25,
                    max_val=0) for idx in range(agent_number)
    ]

    ag_env_comb = AgentEnvCombination(agents, env)

    target_reward = -1.0 * agent_number
    episode_rewards = ag_env_comb.train(15000, 10,
                                        target_reward)  # 10000, 100)

    assert np.mean(episode_rewards[-10:]) > target_reward
예제 #2
0
def get_agents(_run, env, num_adversaries, good_policy, adv_policy, lr,
               batch_size, buff_size, num_units, num_layers, gamma, tau,
               priori_replay, alpha, num_episodes, max_episode_len, beta,
               policy_update_rate, critic_action_noise_stddev, entropy_coeff,
               num_atoms, min_val, max_val) -> List[AbstractAgent]:
    """
    This function generates the agents for the environment. The parameters are meant to be filled
    by sacred, and are therefore documented in the configuration function train_config.

    :returns List[AbstractAgent] returns a list of instantiated agents
    """
    agents = []
    for agent_idx in range(num_adversaries):
        if adv_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif adv_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    for agent_idx in range(num_adversaries,
                           num_adversaries + env.n_good_agents - 1):
        if good_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif good_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    for agent_idx in range(num_adversaries + env.n_good_agents, env.n):
        if good_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif good_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    print('Using good policy {} and adv policy {}'.format(
        good_policy, adv_policy))
    return agents
예제 #3
0
    def create_agent(alg_name):
        if alg_name == 'maddpg':
            ret_agent = MADDPGAgent(env.observation_space,
                                    env.action_space,
                                    agent_idx,
                                    batch_size,
                                    buff_size,
                                    lr,
                                    num_layers,
                                    num_units,
                                    gamma,
                                    tau,
                                    priori_replay,
                                    alpha=alpha,
                                    max_step=num_episodes * max_episode_len,
                                    initial_beta=beta,
                                    _run=_run)
        elif alg_name == 'matd3':
            ret_agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif alg_name == 'mad3pg':
            ret_agent = MAD3PGAgent(env.observation_space,
                                    env.action_space,
                                    agent_idx,
                                    batch_size,
                                    buff_size,
                                    lr,
                                    num_layers,
                                    num_units,
                                    gamma,
                                    tau,
                                    priori_replay,
                                    alpha=alpha,
                                    max_step=num_episodes * max_episode_len,
                                    initial_beta=beta,
                                    num_atoms=num_atoms,
                                    min_val=min_val,
                                    max_val=max_val,
                                    _run=_run)
        elif alg_name == 'masac':
            ret_agent = MASACAgent(env.observation_space,
                                   env.action_space,
                                   agent_idx,
                                   batch_size,
                                   buff_size,
                                   lr,
                                   num_layers,
                                   num_units,
                                   gamma,
                                   tau,
                                   priori_replay,
                                   alpha=alpha,
                                   max_step=num_episodes * max_episode_len,
                                   initial_beta=beta,
                                   entropy_coeff=entropy_coeff,
                                   policy_update_freq=policy_update_rate,
                                   _run=_run)
        else:
            raise RuntimeError('Invalid Class')

        return ret_agent
예제 #4
0
def test_save_load():
    """
    Tests saving and loading for two agents.
    """
    fp = '/tmp/unittestmaddpg'
    env = IdentityEnv(5, 2)
    agents = [
        MAD3PGAgent(env.observation_space,
                    env.action_space,
                    idx,
                    batch_size=32,
                    buff_size=10000,
                    lr=0.01,
                    num_layer=2,
                    num_units=32,
                    gamma=0.9,
                    tau=0.01,
                    prioritized_replay=True,
                    max_step=5000) for idx in range(2)
    ]

    for idx, agent in enumerate(agents):
        agent.save(fp + str(idx))

    load_agents = [
        MAD3PGAgent(env.observation_space,
                    env.action_space,
                    idx,
                    batch_size=32,
                    buff_size=10000,
                    lr=0.01,
                    num_layer=2,
                    num_units=32,
                    gamma=0.9,
                    tau=0.01,
                    prioritized_replay=True,
                    max_step=5000) for idx in range(2)
    ]

    for idx, agent in enumerate(load_agents):
        agent.load(fp + str(idx))

    def check_for_equal_arrays(list_1, list_2):
        for el1, el2 in zip(list_1, list_2):
            if not (el1 == el2).all():
                return False
        return True

    assert check_for_equal_arrays(load_agents[0].critic.model.get_weights(),
                                  agents[0].critic.model.get_weights())
    assert check_for_equal_arrays(load_agents[1].critic.model.get_weights(),
                                  agents[1].critic.model.get_weights())
    assert check_for_equal_arrays(
        load_agents[0].critic_target.model.get_weights(),
        agents[0].critic_target.model.get_weights())
    assert check_for_equal_arrays(
        load_agents[1].critic_target.model.get_weights(),
        agents[1].critic_target.model.get_weights())

    assert check_for_equal_arrays(load_agents[0].policy.model.get_weights(),
                                  agents[0].policy.model.get_weights())
    assert check_for_equal_arrays(load_agents[1].policy.model.get_weights(),
                                  agents[1].policy.model.get_weights())
    assert check_for_equal_arrays(
        load_agents[0].policy_target.model.get_weights(),
        agents[0].policy_target.model.get_weights())
    assert check_for_equal_arrays(
        load_agents[1].policy_target.model.get_weights(),
        agents[1].policy_target.model.get_weights())
예제 #5
0
def create_agent(alg_name, index: int, env: MultiAgentEnv, exp):
    conf = exp.config
    if alg_name == 'maddpg':
        ret_agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                index,
                                conf.batch_size,
                                conf.buff_size,
                                conf.lr,
                                conf.num_layers,
                                conf.num_units,
                                conf.gamma,
                                conf.tau,
                                conf.priori_replay,
                                alpha=conf.alpha,
                                max_step=conf.num_episodes *
                                conf.max_episode_len,
                                initial_beta=conf.beta,
                                _run=exp)
    elif alg_name == 'matd3':
        ret_agent = MATD3Agent(
            env.observation_space,
            env.action_space,
            index,
            conf.batch_size,
            conf.buff_size,
            conf.lr,
            conf.num_layers,
            conf.num_units,
            conf.gamma,
            conf.tau,
            conf.priori_replay,
            alpha=conf.alpha,
            max_step=conf.num_episodes * conf.max_episode_len,
            initial_beta=conf.beta,
            policy_update_freq=conf.policy_update_rate,
            target_policy_smoothing_eps=conf.critic_action_noise_stddev,
            _run=exp)
    elif alg_name == 'mad3pg':
        ret_agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                index,
                                conf.batch_size,
                                conf.buff_size,
                                conf.lr,
                                conf.num_layers,
                                conf.num_units,
                                conf.gamma,
                                conf.tau,
                                conf.priori_replay,
                                alpha=conf.alpha,
                                max_step=conf.num_episodes *
                                conf.max_episode_len,
                                initial_beta=conf.beta,
                                num_atoms=conf.num_atoms,
                                min_val=conf.min_val,
                                max_val=conf.max_val,
                                _run=exp)
    elif alg_name == 'masac':
        ret_agent = MASACAgent(env.observation_space,
                               env.action_space,
                               index,
                               conf.batch_size,
                               conf.buff_size,
                               conf.lr,
                               conf.num_layers,
                               conf.num_units,
                               conf.gamma,
                               conf.tau,
                               conf.priori_replay,
                               alpha=conf.alpha,
                               max_step=conf.num_episodes *
                               conf.max_episode_len,
                               initial_beta=conf.beta,
                               entropy_coeff=conf.entropy_coeff,
                               policy_update_freq=conf.policy_update_rate,
                               _run=exp)
    else:
        raise RuntimeError(f'Invalid Class - {alg_name} is unknown')

    return ret_agent