def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [400, 300] hidden_sizes_critic = [400, 300] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic critic1 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target2 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1.load_state_dict(critic1.state_dict()) critic_target2.load_state_dict(critic2.state_dict()) # concat critic parameters to use one optim critic_parameters = list(critic1.parameters()) + list(critic2.parameters()) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic_parameters, lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise instance to make randomness of action exploration_noise = GaussianNoise(action_dim, hyper_params["EXPLORATION_NOISE"], hyper_params["EXPLORATION_NOISE"]) target_policy_noise = GaussianNoise( action_dim, hyper_params["TARGET_POLICY_NOISE"], hyper_params["TARGET_POLICY_NOISE"], ) # make tuples to create an agent models = (actor, actor_target, critic1, critic2, critic_target1, critic_target2) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env, args, hyper_params, models, optims, exploration_noise, target_policy_noise) # run if args.test: agent.test() else: agent.train()
def get(env, args): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings """ state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"] hidden_sizes_critic = hyper_params["NETWORK"]["CRITIC_HIDDEN_SIZES"] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic1 critic1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic1_target = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic1_target.load_state_dict(critic1.state_dict()) # create critic2 critic2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2_target = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2_target.load_state_dict(critic2.state_dict()) # concat critic parameters to use one optim critic_parameters = list(critic1.parameters()) + list(critic2.parameters()) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic_parameters, lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise exploration_noise = GaussianNoise( action_dim, min_sigma=hyper_params["EXPLORATION_NOISE"], max_sigma=hyper_params["EXPLORATION_NOISE"], ) target_policy_noise = GaussianNoise( action_dim, min_sigma=hyper_params["TARGET_POLICY_NOISE"], max_sigma=hyper_params["TARGET_POLICY_NOISE"], ) # make tuples to create an agent models = (actor, actor_target, critic1, critic1_target, critic2, critic2_target) optims = (actor_optim, critic_optim) noises = (exploration_noise, target_policy_noise) # create an agent return Agent(env, args, hyper_params, models, optims, noises)
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [400, 300] hidden_sizes_critic = [400, 300] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic critic_1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1.load_state_dict(critic_1.state_dict()) critic_target2.load_state_dict(critic_2.state_dict()) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_parameter = list(critic_1.parameters()) + list(critic_2.parameters()) critic_optim = optim.Adam( critic_parameter, lr=hyper_params["LR_CRITIC_1"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise instance to make randomness of action noise = GaussianNoise( hyper_params["GAUSSIAN_NOISE_MIN_SIGMA"], hyper_params["GAUSSIAN_NOISE_MAX_SIGMA"], hyper_params["GAUSSIAN_NOISE_DECAY_PERIOD"], ) # make tuples to create an agent models = (actor, actor_target, critic_1, critic_2, critic_target1, critic_target2) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env, args, hyper_params, models, optims, noise) # run if args.test: agent.test() else: agent.train()