self.max_episode_length = 10000 self.seed = 1 self.env_name = 'Pendulum-v0' if __name__ == '__main__': os.environ['OMP_NUM_THREADS'] = '1' params = Params() torch.manual_seed(params.seed) env = gym.make(params.env_name) num_inputs = env.observation_space.shape[0] num_outputs = env.action_space.shape[0] shared_p = Policy(num_inputs, num_outputs) shared_v = Value(num_inputs) shared_p.share_memory() shared_v.share_memory() optimizer_p = my_optim.SharedAdam(shared_p.parameters(), lr=params.lr) optimizer_v = my_optim.SharedAdam(shared_v.parameters(), lr=params.lr) processes = [] p = mp.Process(target=test, args=(params.num_processes, params, shared_p)) p.start() processes.append(p) for rank in range(0, params.num_processes): p = mp.Process(target=train, args=(rank, params, shared_p, shared_v, optimizer_p, optimizer_v)) p.start() processes.append(p) for p in processes:
# Getting action dim and observation dim from Env env = Env(args, device='cpu', options=options, dummy=True) observation_dim = env.observation_dim args.action_dim = env.action_dim env.close() print('Observation Space: {} / Action Dim: {}'.format( observation_dim, args.action_dim)) # Initializing shared memory used between workers and learner that contains the actor parameters shared_state_dict = Policy(args.action_dim) if args.load_model is not None: partial_load(shared_state_dict, args.load_model) if args.reset_policy: shared_state_dict.policy.weight.data.zero_() shared_state_dict.policy.bias.data.zero_() shared_state_dict = shared_state_dict.share_memory() # Creating learner learner = Learner(args, experience_buffer.queue_batch, shared_state_dict) # Creating actors actors = [] for i in range(args.n_workers): actor_name = 'actor_' + str(i) actor = Actor(args, experience_buffer.queue_trace, shared_state_dict, actor_name, rank=i) actors.append(actor)