def finish_actors(self): for _ in range(self.num_actors): self.command_queue.put('end', True) for pi, p in enumerate(self.processes): p.terminate() p.join() print('processes %d closed' % pi)
nb_inputs = env.observation_space.sample().shape[0] nb_outputs = env.action_space.sample().shape[0] * 2 policy = Policy(nb_inputs, nb_outputs, hp.env_name, hp.normal, args) hp.nb_directions = int(policy.theta.shape[0] * policy.theta.shape[1]) hp.nb_best_directions = int(hp.nb_directions / 2) normalizer = Normalizer(nb_inputs) print("start training") train(env, policy, normalizer, hp, parentPipes, args) if args.mp: for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join() # -------------------------------------------------------------------------------- # STOCH2 Test # env = sv.StochBulletEnv(render = True, gait = 'trot') # hp = HyperParameters() # nb_inputs = env.observation_space.shape[0] # nb_outputs = env.action_space.shape[0] * 2 # args = 0 # policy = Policy(nb_inputs, nb_outputs, hp.env_name, 0, args) # normalizer = Normalizer(nb_inputs) # deltas = policy.sample_deltas() # state = env.reset() # i = 0 # hp.noise = 0.2