예제 #1
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create multiple envs
    env_single = env
    env_gen = env_generator("LunarLanderContinuous-v2", args)
    env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"])

    # create models
    hidden_sizes_actor = [256, 256]
    hidden_sizes_critic = [256, 256]

    actor = GaussianDist(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        hidden_activation=torch.tanh,
    ).to(device)

    critic = MLP(
        input_size=state_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
        hidden_activation=torch.tanh,
    ).to(device)

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic.parameters(),
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (actor, critic)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = Agent(env_single, env_multi, args, hyper_params, models, optims)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #2
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create models
    actor = GaussianDist(input_size=state_dim,
                         output_size=action_dim,
                         hidden_sizes=[256, 256]).to(device)

    critic = MLP(input_size=state_dim, output_size=1,
                 hidden_sizes=[256, 256]).to(device)

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic.parameters(),
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (actor, critic)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = Agent(env, args, hyper_params, models, optims)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #3
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    hidden_sizes_actor = [400, 300]
    hidden_sizes_critic = [400, 300]

    # create actor
    actor = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)

    actor_target = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)

    actor_target.load_state_dict(actor.state_dict())

    # create critic
    critic1 = FlattenMLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic2 = FlattenMLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic_target1 = FlattenMLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic_target2 = FlattenMLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic_target1.load_state_dict(critic1.state_dict())
    critic_target2.load_state_dict(critic2.state_dict())

    # concat critic parameters to use one optim
    critic_parameters = list(critic1.parameters()) + list(critic2.parameters())

    # create optimizers
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic_parameters,
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # noise instance to make randomness of action
    exploration_noise = GaussianNoise(action_dim,
                                      hyper_params["EXPLORATION_NOISE"],
                                      hyper_params["EXPLORATION_NOISE"])

    target_policy_noise = GaussianNoise(
        action_dim,
        hyper_params["TARGET_POLICY_NOISE"],
        hyper_params["TARGET_POLICY_NOISE"],
    )

    # make tuples to create an agent
    models = (actor, actor_target, critic1, critic2, critic_target1,
              critic_target2)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = TD3Agent(env, args, hyper_params, models, optims,
                     exploration_noise, target_policy_noise)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #4
0
def get(env, args):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings

    """
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    if hyper_params["USE_HER"]:
        state_dim *= 2

    hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"]
    hidden_sizes_vf = hyper_params["NETWORK"]["VF_HIDDEN_SIZES"]
    hidden_sizes_qf = hyper_params["NETWORK"]["QF_HIDDEN_SIZES"]

    # target entropy
    target_entropy = -np.prod((action_dim, )).item()  # heuristic

    # create actor
    actor = TanhGaussianDistParams(input_size=state_dim,
                                   output_size=action_dim,
                                   hidden_sizes=hidden_sizes_actor).to(device)

    # create v_critic
    vf = MLP(input_size=state_dim, output_size=1,
             hidden_sizes=hidden_sizes_vf).to(device)
    vf_target = MLP(input_size=state_dim,
                    output_size=1,
                    hidden_sizes=hidden_sizes_vf).to(device)
    vf_target.load_state_dict(vf.state_dict())

    # create q_critic
    qf_1 = FlattenMLP(input_size=state_dim + action_dim,
                      output_size=1,
                      hidden_sizes=hidden_sizes_qf).to(device)
    qf_2 = FlattenMLP(input_size=state_dim + action_dim,
                      output_size=1,
                      hidden_sizes=hidden_sizes_qf).to(device)

    # create optimizers
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )
    vf_optim = optim.Adam(
        vf.parameters(),
        lr=hyper_params["LR_VF"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )
    qf_1_optim = optim.Adam(
        qf_1.parameters(),
        lr=hyper_params["LR_QF1"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )
    qf_2_optim = optim.Adam(
        qf_2.parameters(),
        lr=hyper_params["LR_QF2"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (actor, vf, vf_target, qf_1, qf_2)
    optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim)

    # HER
    her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None

    # create an agent
    return Agent(env, args, hyper_params, models, optims, target_entropy, her)
예제 #5
0
def get(env, args):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings

    """
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"]
    hidden_sizes_critic = hyper_params["NETWORK"]["CRITIC_HIDDEN_SIZES"]

    # create actor
    actor = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)

    actor_target = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)
    actor_target.load_state_dict(actor.state_dict())

    # create critic1
    critic1 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic1_target = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic1_target.load_state_dict(critic1.state_dict())

    # create critic2
    critic2 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic2_target = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic2_target.load_state_dict(critic2.state_dict())

    # concat critic parameters to use one optim
    critic_parameters = list(critic1.parameters()) + list(critic2.parameters())

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic_parameters,
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # noise
    exploration_noise = GaussianNoise(
        action_dim,
        min_sigma=hyper_params["EXPLORATION_NOISE"],
        max_sigma=hyper_params["EXPLORATION_NOISE"],
    )

    target_policy_noise = GaussianNoise(
        action_dim,
        min_sigma=hyper_params["TARGET_POLICY_NOISE"],
        max_sigma=hyper_params["TARGET_POLICY_NOISE"],
    )

    # make tuples to create an agent
    models = (actor, actor_target, critic1, critic1_target, critic2,
              critic2_target)
    optims = (actor_optim, critic_optim)
    noises = (exploration_noise, target_policy_noise)

    # create an agent
    return Agent(env, args, hyper_params, models, optims, noises)
예제 #6
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    hidden_sizes_actor = [256, 256]
    hidden_sizes_vf = [256, 256]
    hidden_sizes_qf = [256, 256]

    # target entropy
    target_entropy = -np.prod((action_dim, )).item()  # heuristic

    # create actor
    actor = TanhGaussianDistParams(input_size=state_dim,
                                   output_size=action_dim,
                                   hidden_sizes=hidden_sizes_actor).to(device)

    # create v_critic
    vf = MLP(input_size=state_dim, output_size=1,
             hidden_sizes=hidden_sizes_vf).to(device)
    vf_target = MLP(input_size=state_dim,
                    output_size=1,
                    hidden_sizes=hidden_sizes_vf).to(device)
    vf_target.load_state_dict(vf.state_dict())

    # create q_critic
    qf_1 = FlattenMLP(input_size=state_dim + action_dim,
                      output_size=1,
                      hidden_sizes=hidden_sizes_qf).to(device)
    qf_2 = FlattenMLP(input_size=state_dim + action_dim,
                      output_size=1,
                      hidden_sizes=hidden_sizes_qf).to(device)

    # create optimizers
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["LAMBDA2"],
    )
    vf_optim = optim.Adam(vf.parameters(),
                          lr=hyper_params["LR_VF"],
                          weight_decay=hyper_params["LAMBDA2"])
    qf_1_optim = optim.Adam(
        qf_1.parameters(),
        lr=hyper_params["LR_QF1"],
        weight_decay=hyper_params["LAMBDA2"],
    )
    qf_2_optim = optim.Adam(
        qf_2.parameters(),
        lr=hyper_params["LR_QF2"],
        weight_decay=hyper_params["LAMBDA2"],
    )

    # make tuples to create an agent
    models = (actor, vf, vf_target, qf_1, qf_2)
    optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim)

    # create an agent
    agent = SACfDAgent(env, args, hyper_params, models, optims, target_entropy)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #7
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    hidden_sizes_actor = [256, 256]
    hidden_sizes_critic = [256, 256]

    # create actor
    actor = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)

    actor_target = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)
    actor_target.load_state_dict(actor.state_dict())

    # create critic
    critic = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)

    critic_target = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic_target.load_state_dict(critic.state_dict())

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic.parameters(),
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # noise
    noise = OUNoise(
        action_dim,
        theta=hyper_params["OU_NOISE_THETA"],
        sigma=hyper_params["OU_NOISE_SIGMA"],
    )

    # make tuples to create an agent
    models = (actor, actor_target, critic, critic_target)
    optims = (actor_optim, critic_optim)

    # HER
    her = ReacherHER() if hyper_params["USE_HER"] else None

    # create an agent
    agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #8
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    hidden_sizes_actor = [400, 300]
    hidden_sizes_critic = [400, 300]

    # create actor
    actor = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)
    actor_target = MLP(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        output_activation=torch.tanh,
    ).to(device)
    actor_target.load_state_dict(actor.state_dict())

    # create critic
    critic_1 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic_2 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic_target1 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic_target2 = MLP(
        input_size=state_dim + action_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
    ).to(device)
    critic_target1.load_state_dict(critic_1.state_dict())
    critic_target2.load_state_dict(critic_2.state_dict())

    # create optimizers
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )
    critic_parameter = list(critic_1.parameters()) + list(critic_2.parameters())
    critic_optim = optim.Adam(
        critic_parameter,
        lr=hyper_params["LR_CRITIC_1"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # noise instance to make randomness of action
    noise = GaussianNoise(
        hyper_params["GAUSSIAN_NOISE_MIN_SIGMA"],
        hyper_params["GAUSSIAN_NOISE_MAX_SIGMA"],
        hyper_params["GAUSSIAN_NOISE_DECAY_PERIOD"],
    )

    # make tuples to create an agent
    models = (actor, actor_target, critic_1, critic_2, critic_target1, critic_target2)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = Agent(env, args, hyper_params, models, optims, noise)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
예제 #9
0
def run(env: gym.Env,
        env_name: str,
        args: argparse.Namespace,
        state_dim=75,
        action_dim=17):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create models
    actor = CategoricalDist(input_size=state_dim,
                            output_size=action_dim,
                            hidden_sizes=[256, 256, 128]).to(device)

    critic = MLP(input_size=state_dim,
                 output_size=1,
                 hidden_sizes=[256, 256, 128]).to(device)

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic.parameters(),
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (actor, critic)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = A2CAgent(env, args, hyper_params, models, optims)

    # run
    if args.test:
        all_names = [
            'N3000', 'N3001', 'N3002', 'N3003', 'N3005', 'N3006', 'N3007',
            'N3008', 'N300A', 'N300B', 'N300C', 'N300D', 'N300E', 'N3100',
            'N3101', 'N3102', 'N3103', 'N3105', 'N3106', 'N3107', 'N3108',
            'N310A', 'N310B', 'N310C', 'N310D', 'N310E', 'N3200', 'N3201',
            'N3202', 'N3203', 'N3204', 'N3205', 'N3206', 'N3207', 'N3208',
            'N3209', 'N320A', 'N320B', 'N320C', 'N320D', 'N320E', 'N3300',
            'N3301', 'N3302', 'N3303', 'N3304', 'N3305', 'N3306', 'N3307',
            'N3308', 'N3309', 'N330A', 'N330B', 'N330C', 'N330D', 'N330E',
            'N3400', 'N3401', 'N3402', 'N3403', 'N3405', 'N3406', 'N3407',
            'N3408', 'N340A', 'N340B', 'N340C', 'N340D', 'N340E', 'N3500',
            'N3501', 'N3502', 'N3503', 'N3505', 'N3506', 'N3507', 'N3508',
            'N350A', 'N350B', 'N350C', 'N350D', 'N350E', 'N3600', 'N3601',
            'N3602', 'N3603', 'N3604', 'N3605', 'N3607', 'N3608', 'N360A',
            'N360B', 'N360C', 'N360D', 'N360E', 'N3700', 'N3701', 'N3702',
            'N3703', 'N3705', 'N3706', 'N3707', 'N3708', 'N370A', 'N370B',
            'N370C', 'N370D', 'N370E'
        ]

        for name in names:
            agent.test(name='Glover/' + name + '.FCTP')

    else:
        agent.train()