Beispiel #1
0
    np.random.seed(args.seed)

    # create environment
    env = gym.make(args.env)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    train_tools.EVAL_SEED = args.seed

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    act_bound = env.action_space.high[0]

    # create nets
    actor_net = DDPGMLPActor(obs_dim=obs_dim,
                             act_dim=act_dim,
                             act_bound=act_bound,
                             hidden_size=[256, 256],
                             hidden_activation=nn.ReLU)

    critic_net1 = MLPQsaNet(obs_dim=obs_dim,
                            act_dim=act_dim,
                            hidden_size=[256, 256],
                            hidden_activation=nn.ReLU)
    critic_net2 = MLPQsaNet(obs_dim=obs_dim,
                            act_dim=act_dim,
                            hidden_size=[256, 256],
                            hidden_activation=nn.ReLU)

    # create buffer
    if args.show:
        data_buffer = None
Beispiel #2
0
from common.networks import MLPQsaNet, DDPGMLPActor

if __name__ == '__main__':
    # create environment
    env = gym.make("Pendulum-v0")
    # env = gym.make('LunarLanderContinuous-v2')
    # env = gym.make('BipedalWalker-v3')

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    act_bound = env.action_space.high[0]

    # create nets
    actor_net = DDPGMLPActor(obs_dim=obs_dim,
                             act_dim=act_dim,
                             act_bound=act_bound,
                             hidden_size=[400, 300],
                             hidden_activation=nn.ReLU)

    critic_net = MLPQsaNet(obs_dim=obs_dim,
                           act_dim=act_dim,
                           hidden_size=[400, 300],
                           hidden_activation=nn.ReLU)

    # create optimizer
    actor_optimizer = torch.optim.Adam(actor_net.parameters(), lr=1e-4)
    critic_optimizer = torch.optim.Adam(critic_net.parameters(), lr=1e-3)

    # create buffer
    replay_buffer = ReplayBuffer(obs_dim=obs_dim,
                                 act_dim=act_dim,