Ejemplo n.º 1
0
    def __init__(self,
                 config,
                 policy_params,
                 env_name,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = gym.make(env_name)
        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.MujocoPolicy(self.env.observation_space,
                                            self.env.action_space,
                                            **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert self.policy.needs_ob_stat == (self.config.calc_obstat_prob != 0)
Ejemplo n.º 2
0
    }

    # Create the shared noise table.
    print("Creating shared noise table.")
    noise_array = create_shared_noise()
    noise = SharedNoiseTable(noise_array)

    # Create the workers.
    print("Creating workers.")
    workers = [
        Worker(config, policy_params, env_name, noise_array)
        for _ in range(num_workers)
    ]

    env = gym.make(env_name)
    sess = utils.make_session(single_threaded=False)
    policy = policies.MujocoPolicy(env.observation_space, env.action_space,
                                   **policy_params)
    tf_util.initialize()
    optimizer = optimizers.Adam(policy, stepsize)

    ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2)

    episodes_so_far = 0
    timesteps_so_far = 0
    tstart = time.time()

    while True:
        step_tstart = time.time()
        theta = policy.get_trainable_flat()
        assert theta.dtype == np.float32