Ejemplo n.º 1
0
      def dist_fn(dist):
        try:
          greedy_action = dist.mode()
        except NotImplementedError:
          raise ValueError("Your network's distribution does not implement "
                           "mode making it incompatible with a greedy policy.")

        return greedy_policy.DeterministicWithLogProb(loc=greedy_action)
Ejemplo n.º 2
0
    def deterministic_action_distribution(self, time_step):
        """
        Produce a deterministic tfp.distribution centered on the action
        from the current table.

        """
        obs = time_step.observation
        action = self.table_lookup(obs)
        return greedy_policy.DeterministicWithLogProb(loc=action)