Ejemplo n.º 1
0
def run_po_rollout_batch(batch_size, rs_seed, noise_std=None):
    global noise
    t_init = time.time()
    interaction = interaction_shared
    theta = fiber_get_theta()
    obs_mean, obs_std = fiber_get_obs_stats()
    random_state = np.random.RandomState(rs_seed)
    random_state.seed(rs_seed)

    assert noise_std is not None
    noise_inds = np.asarray([
        noise.sample_index(random_state, len(theta)) for _ in range(batch_size)
    ],
                            dtype='int')

    returns = np.zeros((batch_size, 2))
    final_xpos = np.zeros((batch_size, 2))
    lengths = np.zeros((batch_size, 2), dtype='int')
    bcs = [None] * 2

    # mirror sampling
    thetas = (theta + noise_std * noise.get(noise_idx, len(theta))
              for noise_idx in noise_inds)
    returns[:, 0], lengths[:, 0], bcs[
        0], final_xpos[:, 0], _, _, _, = interaction.rollout_batch(
            thetas=thetas,
            batch_size=batch_size,
            random_state=random_state,
            obs_mean=obs_mean,
            obs_std=obs_std)
    thetas = (theta - noise_std * noise.get(noise_idx, len(theta))
              for noise_idx in noise_inds)
    returns[:, 1], lengths[:, 1], bcs[
        1], final_xpos[:,
                       1], obs_sum, obs_sq, obs_count = interaction.rollout_batch(
                           thetas=thetas,
                           batch_size=batch_size,
                           random_state=random_state,
                           obs_mean=obs_mean,
                           obs_std=obs_std)
    end = time.time() - t_init
    return POResult(returns=returns,
                    noise_inds=noise_inds,
                    lengths=lengths,
                    bcs=np.swapaxes(np.array(bcs), 0, 1),
                    obs_sum=obs_sum,
                    obs_sq=obs_sq,
                    obs_count=obs_count,
                    time=end,
                    final_xpos=final_xpos)
Ejemplo n.º 2
0
def run_po_rollout_batch(batch_size, noise_theta, noise_std=None):
    global noise
    t_init = time.time()
    interaction = interaction_shared
    theta = fiber_get_theta()
    obs_mean, obs_std = fiber_get_obs_stats()

    assert noise_std is not None

    random_state = np.random.RandomState()
    thetas = (theta + noise_std * noise.get(noise_theta, len(theta))
              for _ in range(batch_size))
    returns, lengths, bcs, final_xpos, obs_sum, obs_sq, obs_count = interaction.rollout_batch(
        thetas=thetas,
        batch_size=batch_size,
        random_state=random_state,
        obs_mean=obs_mean,
        obs_std=obs_std)

    end = time.time() - t_init
    return POResult(returns=returns,
                    noise_inds=noise_theta,
                    lengths=lengths,
                    bcs=np.swapaxes(np.array(bcs), 0, 1),
                    obs_sum=obs_sum,
                    obs_sq=obs_sq,
                    obs_count=obs_count,
                    time=end,
                    final_xpos=final_xpos)
Ejemplo n.º 3
0
    def start_step(self, theta):
        global noise

        self.broadcast_theta(theta)

        rs_seed = np.random.randint(np.int32(2**31 - 1))
        random_state = np.random.RandomState(rs_seed)
        random_state.seed(rs_seed)
        n_thetas = self.batch_size * self.batches_per_step * 2 // self.nb_evals + 1
        noise_inds = np.asarray([
            noise.sample_index(random_state, len(theta))
            for _ in range(n_thetas)
        ],
                                dtype='int')

        self.broadcast_obs_stats(self.obs_mean, self.obs_std)

        thetas = [
            theta + self.noise_std * noise.get(noise_id, len(theta))
            for noise_id in noise_inds
        ]
        training_task = []
        for i in range(n_thetas):
            training_task += self.start_chunk(run_po_rollout_batch,
                                              self.batch_size, noise_inds[i],
                                              self.noise_std)
        return thetas, training_task
Ejemplo n.º 4
0
 def compute_grads(self, noise_inds, fitness, theta):
     grads, count = batched_weighted_sum(fitness[:, 0] - fitness[:, 1],
                                         (noise.get(idx, len(theta))
                                          for idx in noise_inds),
                                         batch_size=500)
     grads /= len(fitness)
     if self.args['optimizer_args']['divide_gradient_by_noise_std']:
         grads /= self.noise_std
     return grads