예제 #1
0
    def score_goals(self, sampled_ags, info):
        """ Lower is better """
        density_module = getattr(self, self.density_module)
        if not density_module.ready:
            density_module._optimize(force=True)
        interest_module = None
        if hasattr(self, self.interest_module):
            interest_module = getattr(self, self.interest_module)
            if not interest_module.ready:
                interest_module = None

        # sampled_ags is np.array of shape NUM_ENVS x NUM_SAMPLED_GOALS (both arbitrary)
        num_envs, num_sampled_ags = sampled_ags.shape[:2]

        # score the sampled_ags to get log densities, and exponentiate to get densities
        flattened_sampled_ags = sampled_ags.reshape(num_envs * num_sampled_ags,
                                                    -1)
        sampled_ag_scores = density_module.evaluate_log_density(
            flattened_sampled_ags)
        if interest_module:
            # Interest is ~(det(feature_transform)), so we subtract it  in order to add ~(det(inverse feature_transform)) for COV.
            sampled_ag_scores -= interest_module.evaluate_log_interest(
                flattened_sampled_ags)  # add in log interest
        sampled_ag_scores = sampled_ag_scores.reshape(
            num_envs, num_sampled_ags)  # these are log densities

        # Take softmax of the alpha * log density.
        # If alpha = -1, this gives us normalized inverse densities (higher is rarer)
        # If alpha < -1, this skews the density to give us low density samples
        normalized_inverse_densities = softmax(sampled_ag_scores * self.alpha)
        normalized_inverse_densities *= -1.  # make negative / reverse order so that lower is better.

        return normalized_inverse_densities
예제 #2
0
  def score_goals(self, sampled_ags, info):
    """ Higher entropy gain is better """
    if not self.ag_kde.ready:
      self.ag_kde._optimize(force=True)

    if not self.bg_kde.ready:
      self.bg_kde._optimize(force=True)

    if not self.bgag_kde.ready:
      self.bgag_kde._optimize(force=True)

    # sampled_ags is np.array of shape NUM_ENVS x NUM_SAMPLED_GOALS (both arbitrary)
    num_envs, num_sampled_ags = sampled_ags.shape[:2]

    # Get sample of predicted achieved goal from mixture density network
    candidate_bgs = sampled_ags.reshape(num_envs * num_sampled_ags, -1)

    # Reuse the candidate bgs as potential ags
    # Note: We are using a sliding window to reuse sampled_ags as the potential ag for each bg
    # Prior that each bgs has one ag that is identical to bg, i.e. that it reaches the bg.
    num_ags = 10  # TODO: Not make it hard coded
    indexer = np.arange(num_envs * num_sampled_ags).reshape(-1, 1) + np.arange(num_ags).reshape(1, -1)
    indexer %= num_envs * num_sampled_ags  # To wrap around to the beginning
    ags_samples = np.concatenate(
        [candidate_bgs[indexer[i]][np.newaxis, :, :] for i in range(num_envs * num_sampled_ags)], axis=0)

    candidate_bgs_repeat = np.repeat(candidate_bgs[:, np.newaxis, :], num_ags,
                                     axis=1)  # Shape num_envs*num_sampled_ags, num_ags, dim
    joint_candidate_bgags = np.concatenate([candidate_bgs_repeat, ags_samples], axis=-1)
    joint_candidate_bgags = joint_candidate_bgags.reshape(num_envs * num_sampled_ags * num_ags, -1)

    # score the sampled_ags to get log densities, and exponentiate to get densities
    joint_candidate_score = self.bgag_kde.evaluate_log_density(joint_candidate_bgags)
    joint_candidate_score = joint_candidate_score.reshape(num_envs * num_sampled_ags,
                                                          num_ags)  # these are log densities

    candidate_bgs_score = self.bg_kde.evaluate_log_density(
        candidate_bgs_repeat.reshape(num_envs * num_sampled_ags * num_ags, -1))
    candidate_bgs_score = candidate_bgs_score.reshape(num_envs * num_sampled_ags, num_ags)  # these are log densities
    cond_candidate_score = joint_candidate_score - candidate_bgs_score
    cond_candidate_score = softmax(cond_candidate_score, axis=1)

    # Compute entropy gain for the predicted achieved goal
    beta = 1 / len(self.replay_buffer.buffer)
    sampled_ag_entr_new = self.ag_kde.evaluate_elementwise_entropy(candidate_bgs, beta=beta)
    sampled_ag_entr_old = self.ag_kde.evaluate_elementwise_entropy(candidate_bgs, beta=0.)
    sampled_ag_entr_gain = sampled_ag_entr_new - sampled_ag_entr_old
    sampled_ag_entr_gain /= beta  # Normalize by beta # TODO: Get rid of this part if not necessary
    sampled_ag_entr_gain = np.concatenate(
        [sampled_ag_entr_gain[indexer[i]][np.newaxis, :] for i in range(num_envs * num_sampled_ags)], axis=0)
    sampled_ag_entr_gain *= cond_candidate_score
    sampled_ag_entr_gain = sampled_ag_entr_gain.mean(axis=1)

    scores = sampled_ag_entr_gain.reshape(num_envs, num_sampled_ags)
    scores *= -1.  # make negative / reverse order so that lower is better.

    return scores