def finish_actors(self):
     for _ in range(self.num_actors):
         self.command_queue.put('end', True)
     for pi, p in enumerate(self.processes):
         p.terminate()
         p.join()
         print('processes %d closed' % pi)
Ejemplo n.º 2
0
    nb_inputs = env.observation_space.sample().shape[0]
    nb_outputs = env.action_space.sample().shape[0] * 2
    policy = Policy(nb_inputs, nb_outputs, hp.env_name, hp.normal, args)
    hp.nb_directions = int(policy.theta.shape[0] * policy.theta.shape[1])
    hp.nb_best_directions = int(hp.nb_directions / 2)
    normalizer = Normalizer(nb_inputs)

    print("start training")
    train(env, policy, normalizer, hp, parentPipes, args)

    if args.mp:
        for parentPipe in parentPipes:
            parentPipe.send([_CLOSE, "pay2"])

        for p in processes:
            p.join()

    # --------------------------------------------------------------------------------
    # STOCH2 Test
    # env = sv.StochBulletEnv(render = True, gait = 'trot')
    # hp = HyperParameters()
    # nb_inputs = env.observation_space.shape[0]
    # nb_outputs = env.action_space.shape[0] * 2
    # args = 0
    # policy = Policy(nb_inputs, nb_outputs, hp.env_name, 0, args)
    # normalizer = Normalizer(nb_inputs)

    # deltas = policy.sample_deltas()
    # state = env.reset()
    # i = 0
    # hp.noise = 0.2