Exemple #1
0
        self.max_episode_length = 10000
        self.seed = 1
        self.env_name = 'Pendulum-v0'


if __name__ == '__main__':
    os.environ['OMP_NUM_THREADS'] = '1'
    params = Params()
    torch.manual_seed(params.seed)
    env = gym.make(params.env_name)
    num_inputs = env.observation_space.shape[0]
    num_outputs = env.action_space.shape[0]

    shared_p = Policy(num_inputs, num_outputs)
    shared_v = Value(num_inputs)
    shared_p.share_memory()
    shared_v.share_memory()
    optimizer_p = my_optim.SharedAdam(shared_p.parameters(), lr=params.lr)
    optimizer_v = my_optim.SharedAdam(shared_v.parameters(), lr=params.lr)

    processes = []
    p = mp.Process(target=test, args=(params.num_processes, params, shared_p))
    p.start()
    processes.append(p)
    for rank in range(0, params.num_processes):
        p = mp.Process(target=train,
                       args=(rank, params, shared_p, shared_v, optimizer_p,
                             optimizer_v))
        p.start()
        processes.append(p)
    for p in processes:
Exemple #2
0
    # Getting action dim and observation dim from Env
    env = Env(args, device='cpu', options=options, dummy=True)
    observation_dim = env.observation_dim
    args.action_dim = env.action_dim
    env.close()
    print('Observation Space: {} / Action Dim: {}'.format(
        observation_dim, args.action_dim))

    # Initializing shared memory used between workers and learner that contains the actor parameters
    shared_state_dict = Policy(args.action_dim)
    if args.load_model is not None:
        partial_load(shared_state_dict, args.load_model)
        if args.reset_policy:
            shared_state_dict.policy.weight.data.zero_()
            shared_state_dict.policy.bias.data.zero_()
    shared_state_dict = shared_state_dict.share_memory()

    # Creating learner
    learner = Learner(args, experience_buffer.queue_batch, shared_state_dict)

    # Creating actors
    actors = []
    for i in range(args.n_workers):
        actor_name = 'actor_' + str(i)
        actor = Actor(args,
                      experience_buffer.queue_trace,
                      shared_state_dict,
                      actor_name,
                      rank=i)
        actors.append(actor)