예제 #1
0
    def act(self, state, test=False):
        """Returns action for the current state.

        Args: state (State)
            test (bool): if True, no teleporting is used

        Returns:
            action (Action)
            justification (Justification)
        """
        if len(self._plan) == 0:
            node = self._graph.get_node(AS.AbstractState(state))
            if (node is not None and node.active()
                    and not self._explorer.active()):
                # This is happening in a separate process, so this shared
                # graph doesn't get updated
                self._graph_updates.append(Visit(node))
                node.visit()
                self._explorer.activate(node)

            if self._explorer.active():
                action, s = self._explorer.act(state)
                return action, Justification([], self._graph, s)
            elif test:  # No resetting on test episodes!
                action = DefaultAction(random.randint(0,
                                                      self._num_actions - 1))
                justification = Justification([], self._graph, "test random")
                return action, justification
            else:
                return EndEpisode(), Justification([], self._graph, "reset")
        elif (self._enable_teleporting and not test and not self._teleported
              and len(self._plan) > 0
              and self._plan[-1].start.teleport is not None):
            self._teleported = True
            self._plan = self._plan[-1:]
            s = "teleport to: {}".format(self._plan[-1].start.uid)
            justification = Justification(self._plan, self._graph, s)
            return self._plan[-1].start.teleport, justification

        next_edge = self._plan[0]
        self._allow_setting_teleport = \
                self._allow_setting_teleport and not next_edge.training()
        action = DefaultAction(
            self._worker.act(state, next_edge, len(self._worker_rewards),
                             sum(self._worker_rewards)))
        s = "{} -> {} step={} [{:.2f}], train={}, [{:.2f}]".format(
            next_edge.start.uid, next_edge.end.uid, len(self._worker_rewards),
            sum(self._worker_rewards), next_edge.train_count,
            next_edge.success_rate)
        justification = Justification(copy.copy(self._plan), self._graph, s)
        return action, justification
예제 #2
0
  def act(self, state):
    if not self.active():
      raise ValueError("Exploration not active")

    self._steps_left -= 1
    if self._no_ops_left > 0:
      action = DefaultAction(0)
      s = "{} no-ops at {}: {} / {} visits, steps left {}".format(
          self._no_ops, self._node.uid, self._node.visit_count,
          self._node.min_visit_count, self._steps_left)
      self._no_ops_left -= 1
      return action, s

    action = DefaultAction(random.randint(0, self._num_actions - 1))
    s = "uniform random from {}: {} / {} visits, steps left {}".format(
        self._node.uid, self._node.visit_count, self._node.min_visit_count,
        self._steps_left)
    return action, s
예제 #3
0
  def act(self, state):
    if not self.active():
      raise ValueError("RepeatedActionsExplorer not active")

    self._steps_left -= 1
    if self._repeat == 0:
      self._repeat = self._repeat_sampler()
      self._repeated_action = DefaultAction(
          random.randint(0, self._num_actions - 1))

    self._repeat -= 1
    s = "repeat {} random from {}: {} / {} visits, steps left {}".format(
        self._repeat, self._node.uid, self._node.visit_count,
        self._node.min_visit_count, self._steps_left)
    return self._repeated_action, s