def act(self, state, test=False): """Returns action for the current state. Args: state (State) test (bool): if True, no teleporting is used Returns: action (Action) justification (Justification) """ if len(self._plan) == 0: node = self._graph.get_node(AS.AbstractState(state)) if (node is not None and node.active() and not self._explorer.active()): # This is happening in a separate process, so this shared # graph doesn't get updated self._graph_updates.append(Visit(node)) node.visit() self._explorer.activate(node) if self._explorer.active(): action, s = self._explorer.act(state) return action, Justification([], self._graph, s) elif test: # No resetting on test episodes! action = DefaultAction(random.randint(0, self._num_actions - 1)) justification = Justification([], self._graph, "test random") return action, justification else: return EndEpisode(), Justification([], self._graph, "reset") elif (self._enable_teleporting and not test and not self._teleported and len(self._plan) > 0 and self._plan[-1].start.teleport is not None): self._teleported = True self._plan = self._plan[-1:] s = "teleport to: {}".format(self._plan[-1].start.uid) justification = Justification(self._plan, self._graph, s) return self._plan[-1].start.teleport, justification next_edge = self._plan[0] self._allow_setting_teleport = \ self._allow_setting_teleport and not next_edge.training() action = DefaultAction( self._worker.act(state, next_edge, len(self._worker_rewards), sum(self._worker_rewards))) s = "{} -> {} step={} [{:.2f}], train={}, [{:.2f}]".format( next_edge.start.uid, next_edge.end.uid, len(self._worker_rewards), sum(self._worker_rewards), next_edge.train_count, next_edge.success_rate) justification = Justification(copy.copy(self._plan), self._graph, s) return action, justification
def act(self, state): if not self.active(): raise ValueError("Exploration not active") self._steps_left -= 1 if self._no_ops_left > 0: action = DefaultAction(0) s = "{} no-ops at {}: {} / {} visits, steps left {}".format( self._no_ops, self._node.uid, self._node.visit_count, self._node.min_visit_count, self._steps_left) self._no_ops_left -= 1 return action, s action = DefaultAction(random.randint(0, self._num_actions - 1)) s = "uniform random from {}: {} / {} visits, steps left {}".format( self._node.uid, self._node.visit_count, self._node.min_visit_count, self._steps_left) return action, s
def act(self, state): if not self.active(): raise ValueError("RepeatedActionsExplorer not active") self._steps_left -= 1 if self._repeat == 0: self._repeat = self._repeat_sampler() self._repeated_action = DefaultAction( random.randint(0, self._num_actions - 1)) self._repeat -= 1 s = "repeat {} random from {}: {} / {} visits, steps left {}".format( self._repeat, self._node.uid, self._node.visit_count, self._node.min_visit_count, self._steps_left) return self._repeated_action, s