Exemple #1
0
def main():
    # env initialization
    env_name = "FCTP-v1"
    env = env_generator_FCTP(env_name, 100000)  # max_episode_steps
    env_utils.set_env(env, args)
    env.set_comp(False)  # True for two component actions, False for one

    # set a random seed
    common_utils.set_random_seed(args.seed, env)

    # run
    module_path = "fctp_v1." + args.algo  # Set agent type from algo argument from cmd line

    example = importlib.import_module(module_path)
    example.run(env, env_name, args)
Exemple #2
0
def main():
    """Main."""
    # env initialization
    env = gym.make("LunarLanderContinuous-v2")
    env_utils.set_env(env, args)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    # set a random seed
    common_utils.set_random_seed(args.seed, env)

    # run
    module_path = "examples.lunarlander_continuous_v2." + args.algo
    example = importlib.import_module(module_path)
    example.run(env, args, state_dim, action_dim)
Exemple #3
0
def run(env: gym.Env, args: argparse.Namespace):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create multiple envs
    # configure environment so that it works for discrete actions
    env_single = env_utils.set_env(env, args, WRAPPERS)
    env_gen = env_generator("Pong-v0", args, WRAPPERS)
    env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"])

    # create a model
    action_dim = env.action_space.n
    hidden_sizes = [256, 256]

    def get_cnn_model():
        cnn_model = DuelingCNN(
            cnn_layers=[
                CNNLayer(
                    input_size=4,
                    output_size=32,
                    kernel_size=5,
                    pulling_fn=nn.MaxPool2d(3),
                ),
                CNNLayer(
                    input_size=32,
                    output_size=32,
                    kernel_size=3,
                    pulling_fn=nn.MaxPool2d(3),
                ),
                CNNLayer(
                    input_size=32,
                    output_size=64,
                    kernel_size=2,
                    pulling_fn=nn.MaxPool2d(3),
                ),
            ],
            fc_layers=DuelingMLP(
                input_size=256, output_size=action_dim, hidden_sizes=hidden_sizes
            ),
        ).to(device)
        return cnn_model

    dqn = get_cnn_model()
    dqn_target = get_cnn_model()
    dqn_target.load_state_dict(dqn.state_dict())

    # create optimizer
    dqn_optim = optim.Adam(
        dqn.parameters(),
        lr=hyper_params["LR_DQN"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (dqn, dqn_target)

    # create an agent
    agent = Agent(env_single, env_multi, args, hyper_params, models, dqn_optim)
    agent.env_name = "Pong-v0"

    # run
    if args.test:
        agent.test()
    else:
        agent.train()