コード例 #1
0
    def start(self, verbose=False):
        '''
        対戦の開始
        '''
        state = State()
        current_player_mark = 1
        result = None
        while (True):
            #print("="*30)
            current_player = self.players[current_player_mark]
            if verbose:
                print("%s" % (state.to_array()))
                print state.output()
                print("-" * 5)
            # プレイヤーの行動の選択
            index = current_player.select_index(state)
            #print("%s selected %i" % (self.players[current_player_mark].mark.to_string(), index))
            state = state.set(index, self.players[current_player_mark].mark)

            # この時点のstateで報酬が発生する場合はここでrewardを判定して学習できる
            # tic_tac_toeでは勝負が決まるまで報酬は0
            current_player.learn(0)

            if state.is_win(self.players[current_player_mark].mark):
                result = self.players[current_player_mark].mark
                # 勝者の報酬
                current_player.learn(1, True)
                # 敗者の報酬
                self.players[result.opponent().to_int()].learn(-1, True)
                if verbose:
                    print("%s" % (state.to_array()))
                    print("-" * 5)
                    state.output()
                    print("-" * 5)
                    print("%s win!!!" %
                          (self.players[current_player_mark].mark.to_string()))
                break
            elif state.is_draw():
                result = Mark(Empty())
                for player in self.players.itervalues():
                    player.learn(0, True)
                if verbose:
                    state.output()
                    print("draw.")
                break
            current_player_mark = self.players[
                current_player_mark].mark.opponent().to_int()
コード例 #2
0
    def start(self, verbose=False):
        '''
        対戦の開始
        '''
        state = State()
        current_player_mark = 1
        result = None
        while(True):
            #print("="*30)
            current_player = self.players[current_player_mark]
            if verbose:
                print("%s" % (state.to_array()))
                print state.output()
                print("-"*5)
            # プレイヤーの行動の選択
            index = current_player.select_index(state)
            #print("%s selected %i" % (self.players[current_player_mark].mark.to_string(), index))
            state = state.set(index, self.players[current_player_mark].mark)

            # この時点のstateで報酬が発生する場合はここでrewardを判定して学習できる
            # tic_tac_toeでは勝負が決まるまで報酬は0
            current_player.learn(0)

            if state.is_win(self.players[current_player_mark].mark):
                result = self.players[current_player_mark].mark
                # 勝者の報酬
                current_player.learn(1, True)
                # 敗者の報酬
                self.players[result.opponent().to_int()].learn(-1, True)
                if verbose:
                    print("%s" % (state.to_array()))
                    print("-"*5)
                    state.output()
                    print("-"*5)
                    print("%s win!!!" % (self.players[current_player_mark].mark.to_string()))
                break
            elif state.is_draw():
                result = Mark(Empty())
                for player in self.players.itervalues():
                    player.learn(0, True)
                if verbose:
                    state.output()
                    print("draw.")
                break
            current_player_mark = self.players[current_player_mark].mark.opponent().to_int()