Ejemplo n.º 1
0
    def _state_step(
        self, action: D.T_agent[D.T_concurrency[D.T_event]]
    ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                           D.T_agent[D.T_info]]:

        # Get players' moves
        move1, move2 = action['player1'], action['player2']

        # Compute rewards
        r1, r2 = {
            (Move.rock, Move.rock): (0, 0),
            (Move.rock, Move.paper): (-1, 1),
            (Move.rock, Move.scissors): (1, -1),
            (Move.paper, Move.rock): (1, -1),
            (Move.paper, Move.paper): (0, 0),
            (Move.paper, Move.scissors): (-1, 1),
            (Move.scissors, Move.rock): (-1, 1),
            (Move.scissors, Move.paper): (1, -1),
            (Move.scissors, Move.scissors): (0, 0)
        }[move1, move2]

        # Compute num_move increment
        last_state = self._memory
        num_move = last_state.num_move + 1

        return TransitionOutcome(state=State(num_move=num_move),
                                 value={
                                     'player1': TransitionValue(reward=r1),
                                     'player2': TransitionValue(reward=r2)
                                 },
                                 termination=(num_move >= self._max_moves))
Ejemplo n.º 2
0
 def _get_next_state(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
 ) -> D.T_state:
     env = memory._context[0]
     if self._set_state is None or self._get_state is None:
         env = deepcopy(env)
     elif memory._context[4] != self._get_state(env):
         self._set_state(env, memory._context[4])
     self._gym_env = env  # Just in case the simulation environment would be different from the planner's environment...
     obs, reward, done, info = env.step(action)
     outcome = TransitionOutcome(state=obs,
                                 value=Value(reward=reward),
                                 termination=done,
                                 info=info)
     # print('Transition:', str(memory._state), ' -> ', str(action), ' -> ', str(outcome.state))
     return GymDomainStateProxy(
         state=outcome.state,
         context=[
             env,
             memory._state,
             action,
             outcome,
             self._get_state(env) if
             (self._get_state is not None
              and self._set_state is not None) else None,
         ],
     )
Ejemplo n.º 3
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     o = super()._state_step(action)
     self._current_depth += 1
     self._cumulated_reward += o.value.reward
     # self._cumulated_dist_to_start += math.exp(-math.fabs(self._gym_env.sim.get_property_value(prp.position_distance_from_start_mag_mt)))
     self._cumulated_dist_to_start = self._gym_env.sim.get_property_value(
         prp.position_distance_from_start_mag_mt)
     self._cumulated_dist_to_line += math.exp(-math.fabs(
         self._gym_env.sim.get_property_value(prp.shortest_dist)))
     return TransitionOutcome(
         state=GymDomainStateProxy(
             state=o.state._state,
             context=(
                 self._current_depth,
                 self._cumulated_reward,
                 self._cumulated_dist_to_start,
                 self._cumulated_dist_to_line,
             ),
         ),
         value=o.value,
         termination=o.termination,
         info=o.info,
     )
Ejemplo n.º 4
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     obs, reward, done, info = self._gym_env.step(action)
     return TransitionOutcome(state=obs,
                              value=Value(reward=reward),
                              termination=done,
                              info=info)
Ejemplo n.º 5
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=o.state,
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)
Ejemplo n.º 6
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=GymDomainStateProxy(
         state=normalize_and_round(o.state._state),
         context=o.state._context),
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)
Ejemplo n.º 7
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     self._gym_env.set_state(self._current_state)
     o = super()._state_step(action)
     self._current_state = self._gym_env.get_state()
     return TransitionOutcome(
         state=o.state,
         value=Value(reward=o.value.reward - 1),
         termination=o.termination,
         info=o.info,
     )
Ejemplo n.º 8
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     obs, reward, done, info = self._gym_env.step(action)
     if self._set_state is not None and self._get_state is not None:
         state = GymDomainStateProxy(state=obs,
                                     context=self._initial_env_state)
     else:
         state = GymDomainStateProxy(state=obs, context=self._init_env)
     return TransitionOutcome(state=state,
                              value=Value(reward=reward),
                              termination=done,
                              info=info)
Ejemplo n.º 9
0
 def decode(outcome):
     return TransitionOutcome(
         state=MyShmProxy.StateProxy.decode(outcome[0]),
         value=MyShmProxy.TransitionValueProxy.decode(outcome[1:3]),
         termination=MyShmProxy.BoolProxy.decode(outcome[3]))