Beispiel #1
0
    def start(self, verbose=False):
        '''
        対戦の開始
        '''
        state = State()
        current_player_mark = 1
        result = None
        while (True):
            #print("="*30)
            current_player = self.players[current_player_mark]
            if verbose:
                print("%s" % (state.to_array()))
                print state.output()
                print("-" * 5)
            # プレイヤーの行動の選択
            index = current_player.select_index(state)
            #print("%s selected %i" % (self.players[current_player_mark].mark.to_string(), index))
            state = state.set(index, self.players[current_player_mark].mark)

            # この時点のstateで報酬が発生する場合はここでrewardを判定して学習できる
            # tic_tac_toeでは勝負が決まるまで報酬は0
            current_player.learn(0)

            if state.is_win(self.players[current_player_mark].mark):
                result = self.players[current_player_mark].mark
                # 勝者の報酬
                current_player.learn(1, True)
                # 敗者の報酬
                self.players[result.opponent().to_int()].learn(-1, True)
                if verbose:
                    print("%s" % (state.to_array()))
                    print("-" * 5)
                    state.output()
                    print("-" * 5)
                    print("%s win!!!" %
                          (self.players[current_player_mark].mark.to_string()))
                break
            elif state.is_draw():
                result = Mark(Empty())
                for player in self.players.itervalues():
                    player.learn(0, True)
                if verbose:
                    state.output()
                    print("draw.")
                break
            current_player_mark = self.players[
                current_player_mark].mark.opponent().to_int()
        arguments:
            状態
            報酬
            事後状態から行動を行った後の状態
        '''
        if next_state is None:
            next_state_value = 0.0
        else:
            #状態価値テーブルから価値を取得
            next_state_value = self.value[next_state]
        # 行動価値の更新式
        self.value[state] += self.step_size * (reward + next_state_value - self.value[state])
        #print("value[state] %f", self.value[state])

if __name__ == '__main__':
    value = Value()
    state = State()
    #print value.get_value(state)
    #print value.get_max_action(state, Mark(Maru()))
    new_state = state.set(3, Mark(Maru()))
    new_state1 = state.set(4, Mark(Maru()))
    new_state2 = new_state.set(2, Mark(Batsu())) 
    value.update(state, 10, new_state)
    value.update(new_state, 10, new_state2)
    value.update(new_state, 10, new_state2)
    value.update(new_state1, 10, new_state2)
    value.update(new_state1, 100, new_state2)
    #value.update(state, -3, new_state2)
    print value.get_max_action(state, Mark(Maru()))

Beispiel #3
0
            状態
            報酬
            事後状態から行動を行った後の状態
        '''
        if next_state is None:
            next_state_value = 0.0
        else:
            #状態価値テーブルから価値を取得
            next_state_value = self.value[next_state]
        # 行動価値の更新式
        self.value[state] += self.step_size * (reward + next_state_value -
                                               self.value[state])
        #print("value[state] %f", self.value[state])


if __name__ == '__main__':
    value = Value()
    state = State()
    #print value.get_value(state)
    #print value.get_max_action(state, Mark(Maru()))
    new_state = state.set(3, Mark(Maru()))
    new_state1 = state.set(4, Mark(Maru()))
    new_state2 = new_state.set(2, Mark(Batsu()))
    value.update(state, 10, new_state)
    value.update(new_state, 10, new_state2)
    value.update(new_state, 10, new_state2)
    value.update(new_state1, 10, new_state2)
    value.update(new_state1, 100, new_state2)
    #value.update(state, -3, new_state2)
    print value.get_max_action(state, Mark(Maru()))
    def start(self, verbose=False):
        '''
        対戦の開始
        '''
        state = State()
        current_player_mark = 1
        result = None
        while(True):
            #print("="*30)
            current_player = self.players[current_player_mark]
            if verbose:
                print("%s" % (state.to_array()))
                print state.output()
                print("-"*5)
            # プレイヤーの行動の選択
            index = current_player.select_index(state)
            #print("%s selected %i" % (self.players[current_player_mark].mark.to_string(), index))
            state = state.set(index, self.players[current_player_mark].mark)

            # この時点のstateで報酬が発生する場合はここでrewardを判定して学習できる
            # tic_tac_toeでは勝負が決まるまで報酬は0
            current_player.learn(0)

            if state.is_win(self.players[current_player_mark].mark):
                result = self.players[current_player_mark].mark
                # 勝者の報酬
                current_player.learn(1, True)
                # 敗者の報酬
                self.players[result.opponent().to_int()].learn(-1, True)
                if verbose:
                    print("%s" % (state.to_array()))
                    print("-"*5)
                    state.output()
                    print("-"*5)
                    print("%s win!!!" % (self.players[current_player_mark].mark.to_string()))
                break
            elif state.is_draw():
                result = Mark(Empty())
                for player in self.players.itervalues():
                    player.learn(0, True)
                if verbose:
                    state.output()
                    print("draw.")
                break
            current_player_mark = self.players[current_player_mark].mark.opponent().to_int()