Ejemplo n.º 1
0
 def __init__(self, discount: float):
     super().__init__(discount)
     self.env = gym.make('CartPole-v1')
     self.env = ScalingObservationWrapper(self.env,
                                          low=[-2.4, -2.0, -0.42, -3.5],
                                          high=[2.4, 2.0, 0.42, 3.5])
     self.actions = list(
         map(lambda i: Action(i), range(self.env.action_space.n)))
     self.observations = [self.env.reset()]
     self.done = False
Ejemplo n.º 2
0
 def legal_actions(self) -> List[Action]:
     return list(
         map(lambda i: Action(i), range(len(list(self.board.legal_moves)))))
Ejemplo n.º 3
0
 def recurrent_inference(self, hidden_state, action) -> NetworkOutput:
     return NetworkOutput(
         0, 0,
         {Action(i): 1 / self.action_size
          for i in range(self.action_size)}, None)
Ejemplo n.º 4
0
 def initial_inference(self, image) -> NetworkOutput:
     return NetworkOutput(
         0, 0,
         {Action(i): 1 / self.action_size
          for i in range(self.action_size)}, None)
Ejemplo n.º 5
0
 def build_policy_logits(policy_logits):
     return {Action(i): logit for i, logit in enumerate(policy_logits[0])}