def dist_fn(dist): try: greedy_action = dist.mode() except NotImplementedError: raise ValueError("Your network's distribution does not implement " "mode making it incompatible with a greedy policy.") return greedy_policy.DeterministicWithLogProb(loc=greedy_action)
def deterministic_action_distribution(self, time_step): """ Produce a deterministic tfp.distribution centered on the action from the current table. """ obs = time_step.observation action = self.table_lookup(obs) return greedy_policy.DeterministicWithLogProb(loc=action)