def get(env, args): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings """ state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] if hyper_params["USE_HER"]: state_dim *= 2 hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"] hidden_sizes_vf = hyper_params["NETWORK"]["VF_HIDDEN_SIZES"] hidden_sizes_qf = hyper_params["NETWORK"]["QF_HIDDEN_SIZES"] # target entropy target_entropy = -np.prod((action_dim, )).item() # heuristic # create actor actor = TanhGaussianDistParams(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor).to(device) # create v_critic vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target.load_state_dict(vf.state_dict()) # create q_critic qf_1 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) qf_2 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) vf_optim = optim.Adam( vf.parameters(), lr=hyper_params["LR_VF"], weight_decay=hyper_params["WEIGHT_DECAY"], ) qf_1_optim = optim.Adam( qf_1.parameters(), lr=hyper_params["LR_QF1"], weight_decay=hyper_params["WEIGHT_DECAY"], ) qf_2_optim = optim.Adam( qf_2.parameters(), lr=hyper_params["LR_QF2"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, vf, vf_target, qf_1, qf_2) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent return Agent(env, args, hyper_params, models, optims, target_entropy, her)
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [256, 256] hidden_sizes_vf = [256, 256] hidden_sizes_qf = [256, 256] # target entropy target_entropy = -np.prod((action_dim, )).item() # heuristic # create actor actor = TanhGaussianDistParams(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor).to(device) # create v_critic vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target.load_state_dict(vf.state_dict()) # create q_critic qf_1 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) qf_2 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["LAMBDA2"], ) vf_optim = optim.Adam(vf.parameters(), lr=hyper_params["LR_VF"], weight_decay=hyper_params["LAMBDA2"]) qf_1_optim = optim.Adam( qf_1.parameters(), lr=hyper_params["LR_QF1"], weight_decay=hyper_params["LAMBDA2"], ) qf_2_optim = optim.Adam( qf_2.parameters(), lr=hyper_params["LR_QF2"], weight_decay=hyper_params["LAMBDA2"], ) # make tuples to create an agent models = (actor, vf, vf_target, qf_1, qf_2) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent agent = SACfDAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: agent.test() else: agent.train()