Example #1
0
def get_ac_agent():
    actor_net = FullyConnectedBlock(**agent_config["model"]["actor"])
    critic_net = FullyConnectedBlock(**agent_config["model"]["critic"])
    ac_model = SimpleMultiHeadModel(
        {
            "actor": actor_net,
            "critic": critic_net
        },
        optim_option=agent_config["optimization"],
    )
    return ActorCritic(ac_model,
                       ActorCriticConfig(**agent_config["hyper_params"]))
Example #2
0
def create_po_agents(agent_id_list, config):
    input_dim, num_actions = config.input_dim, config.num_actions
    set_seeds(config.seed)
    agent_dict = {}
    for agent_id in agent_id_list:
        actor_net = NNStack(
            "actor",
            FullyConnectedBlock(
                input_dim=input_dim,
                output_dim=num_actions,
                activation=nn.Tanh,
                is_head=True,
                **config.actor_model
            )
        )

        if config.type == "actor_critic":
            critic_net = NNStack(
                "critic",
                FullyConnectedBlock(
                    input_dim=config.input_dim,
                    output_dim=1,
                    activation=nn.LeakyReLU,
                    is_head=True,
                    **config.critic_model
                )
            )

            hyper_params = config.actor_critic_hyper_parameters
            hyper_params.update({"reward_discount": config.reward_discount})
            learning_model = LearningModel(
                actor_net, critic_net, 
                optimizer_options={
                    "actor": OptimizerOptions(cls=Adam, params=config.actor_optimizer),
                    "critic": OptimizerOptions(cls=RMSprop, params=config.critic_optimizer)
                } 
            )
            algorithm = ActorCritic(
                learning_model, ActorCriticConfig(critic_loss_func=nn.SmoothL1Loss(), **hyper_params)
            )
        else:
            learning_model = LearningModel(
                actor_net, 
                optimizer_options=OptimizerOptions(cls=Adam, params=config.actor_optimizer)  
            )
            algorithm = PolicyGradient(learning_model, PolicyOptimizationConfig(config.reward_discount))

        agent_dict[agent_id] = POAgent(name=agent_id, algorithm=algorithm)

    return agent_dict
Example #3
0
def create_dqn_agents(agent_id_list, config):
    num_actions = config.algorithm.num_actions
    set_seeds(config.seed)
    agent_dict = {}
    for agent_id in agent_id_list:
        q_net = NNStack(
            "q_value",
            FullyConnectedBlock(
                input_dim=config.algorithm.input_dim,
                output_dim=num_actions,
                activation=nn.LeakyReLU,
                is_head=True,
                **config.algorithm.model
            )
        )
        learning_model = LearningModel(
            q_net, 
            optimizer_options=OptimizerOptions(cls=RMSprop, params=config.algorithm.optimizer)
        )
        algorithm = DQN(
            learning_model,
            DQNConfig(**config.algorithm.hyper_params, loss_cls=nn.SmoothL1Loss)
        )
        agent_dict[agent_id] = DQNAgent(
            agent_id, algorithm, ColumnBasedStore(**config.experience_pool),
            **config.training_loop_parameters
        )

    return agent_dict
Example #4
0
def get_dqn_agent():
    q_model = SimpleMultiHeadModel(
        FullyConnectedBlock(**agent_config["model"]),
        optim_option=agent_config["optimization"])
    return DQN(q_model, DQNConfig(**agent_config["hyper_params"]))