def get_ac_agent(): actor_net = FullyConnectedBlock(**agent_config["model"]["actor"]) critic_net = FullyConnectedBlock(**agent_config["model"]["critic"]) ac_model = SimpleMultiHeadModel( { "actor": actor_net, "critic": critic_net }, optim_option=agent_config["optimization"], ) return ActorCritic(ac_model, ActorCriticConfig(**agent_config["hyper_params"]))
def create_po_agents(agent_id_list, config): input_dim, num_actions = config.input_dim, config.num_actions set_seeds(config.seed) agent_dict = {} for agent_id in agent_id_list: actor_net = NNStack( "actor", FullyConnectedBlock( input_dim=input_dim, output_dim=num_actions, activation=nn.Tanh, is_head=True, **config.actor_model ) ) if config.type == "actor_critic": critic_net = NNStack( "critic", FullyConnectedBlock( input_dim=config.input_dim, output_dim=1, activation=nn.LeakyReLU, is_head=True, **config.critic_model ) ) hyper_params = config.actor_critic_hyper_parameters hyper_params.update({"reward_discount": config.reward_discount}) learning_model = LearningModel( actor_net, critic_net, optimizer_options={ "actor": OptimizerOptions(cls=Adam, params=config.actor_optimizer), "critic": OptimizerOptions(cls=RMSprop, params=config.critic_optimizer) } ) algorithm = ActorCritic( learning_model, ActorCriticConfig(critic_loss_func=nn.SmoothL1Loss(), **hyper_params) ) else: learning_model = LearningModel( actor_net, optimizer_options=OptimizerOptions(cls=Adam, params=config.actor_optimizer) ) algorithm = PolicyGradient(learning_model, PolicyOptimizationConfig(config.reward_discount)) agent_dict[agent_id] = POAgent(name=agent_id, algorithm=algorithm) return agent_dict
def create_dqn_agents(agent_id_list, config): num_actions = config.algorithm.num_actions set_seeds(config.seed) agent_dict = {} for agent_id in agent_id_list: q_net = NNStack( "q_value", FullyConnectedBlock( input_dim=config.algorithm.input_dim, output_dim=num_actions, activation=nn.LeakyReLU, is_head=True, **config.algorithm.model ) ) learning_model = LearningModel( q_net, optimizer_options=OptimizerOptions(cls=RMSprop, params=config.algorithm.optimizer) ) algorithm = DQN( learning_model, DQNConfig(**config.algorithm.hyper_params, loss_cls=nn.SmoothL1Loss) ) agent_dict[agent_id] = DQNAgent( agent_id, algorithm, ColumnBasedStore(**config.experience_pool), **config.training_loop_parameters ) return agent_dict
def get_dqn_agent(): q_model = SimpleMultiHeadModel( FullyConnectedBlock(**agent_config["model"]), optim_option=agent_config["optimization"]) return DQN(q_model, DQNConfig(**agent_config["hyper_params"]))