Beispiel #1
0
 def step(self, action, debug=0):
     if type(action) is np.ndarray:
         action = np.argmax(action)
     is_jump = action == 1
     move = self.innerAI.select_move(self.observation,
                                     is_jump=is_jump,
                                     debug=debug)
     if move[0] == 'skip':
         return self.observation, -10, True, {'nothing': 'there'}
     self.observation, mv_error = apply_move(move, AGENT_ID,
                                             self.observation)
     if debug:
         print(self.state)
     #Greedy AI processing
     move = self.enemyAI.select_move(self.observation, debug)
     self.observation, _ = apply_move(move, OPPENENT_ID, self.observation)
     done = is_done(self.observation)
     self.reward = winner(AGENT_ID, self.observation)
     if (mv_error == True):
         done = True
         self.reward = -20
     if debug:
         print(done)
     # self.action_space = moves(1, self.state) - THIS SHOULD BE STATIC I BELIEVE
     return self.observation, self.reward, done, {'nothing': 'there'}
Beispiel #2
0
 def step(self, action, debug=0):
     if debug:
         print(self.state)
     move = self.action_to_move(action)
     self.observation, mv_error = apply_move(move, AGENT_ID,
                                             self.observation)
     if debug:
         print(self.state)
     #Greedy AI processing
     move = self.enemyAI.select_move(self.observation, debug)
     self.observation, _ = apply_move(move, OPPENENT_ID, self.observation)
     reward = score(AGENT_ID, self.observation)
     done = is_done(self.observation)
     if (mv_error == True):
         done = True
         reward = WRONG_MOVE_PUNISHMENT
     if debug:
         print(done)
     # self.action_space = moves(1, self.state) - THIS SHOULD BE STATIC I BELIEVE
     return self.observation, reward, done, 0
    def minimax(self, observation, depth=-1):
        player = env.current_player(observation)
        if env.is_done(observation) or depth == 0:
            return env.score(self.player, observation)
        else:
            actions = env.legal_moves(player, observation)
            scores = []
            for a in actions:
                self.states_evaluated += 1
                s, _ = env.apply_move(a, observation)
                score = self.minimax(s, depth=depth - 1)
                scores.append(score)

            if player == self.player:
                maximum = np.argmax(scores)
                return scores[maximum]
            else:
                minimum = np.argmin(scores)
                return scores[minimum]
Beispiel #4
0
    def look_forward(self, observation):
        all_my_moves = env.legal_moves(self.player, observation)
        actions = []

        for m in all_my_moves:
            self.states_evaluated += 1
            new_observation, _ = env.apply_move(m, self.player, observation)
            score = self.minimax(new_observation, self.ply - 1)
            actions.append({
                'action':
                m,
                'score':
                score,
                'immediate_score':
                env.score(self.player, new_observation)
            })
        best_action = max(actions,
                          key=lambda p: (p['score'], p['immediate_score']))
        return best_action['action']