コード例 #1
0
ファイル: test.py プロジェクト: siolag161/algorithms
    def TestCase(self):
        kmp = KMP()
        rs = kmp.search_MP("abcabcabcabc", "cabc")
        self.assertListEqual(rs, [2,5,8])

        rs = kmp.search_MP("ABC ABCDAB ABCDABCDABDE", "ABCDABD")
        self.assertListEqual(rs, [15])
コード例 #2
0
ファイル: test.py プロジェクト: siolag161/algorithms
    def TestCase(self):
        kmp = KMP()
        rs = kmp.search_MP("abcabcabcabc", "cabc")
        self.assertListEqual(rs, [2, 5, 8])

        rs = kmp.search_MP("ABC ABCDAB ABCDABCDABDE", "ABCDABD")
        self.assertListEqual(rs, [15])
コード例 #3
0
 def __init__(self, p):
     # p : m by m sized pattern.
     self.p = list(p)
     self.m = len(p)
     # dr : pi[1..m] -> distinct row number.
     # pp : p converted in to sequence of distinct row #s.
     self.dr, self.pp = self.construct()
     # ac : Aho-Corasick instance for p (row-matching)
     self.ac = AC(self.dr.keys())
     # kmp : KMP instance for pp (column-matching)
     self.kmp = KMP(self.pp)
コード例 #4
0
    def do_kmp(spam_text, data_resp):
        results = []

        for data in data_resp:
            text = data['text']
            result = KMP.match_string(text, spam_text)
            result['profile_img'] = data['profile_img']
            result['name'] = data['name']
            result['screen_name'] = data['screen_name']
            results.append(result)

        return results
コード例 #5
0
ファイル: baker_bird.py プロジェクト: slaysd/python-bakerbird
    def __init__(self, stream, pattern_len):
        '''
        Baker-bird 알고리즘 이니셜라이징
        :param patterns: Baker-bird 알고리즘을 실행할 패턴 배열
        '''
        self.ac = AhoCorasick()
        for _ in range(pattern_len):
            line = stream.next()
            self.ac.add_patterns(line)
        self.ac.build()

        self.r = {}
        idx = 1

        stream.set_seek()
        for _ in range(pattern_len):
            row = stream.next()
            if row not in self.r.keys():
                self.r[row] = str(idx)
                idx += 1

        stream.set_seek()
        self.kmp = KMP("".join([str(self.r[stream.next()]) for _ in range(pattern_len)]))
コード例 #6
0
ファイル: baker_bird.py プロジェクト: slaysd/python-bakerbird
class BakerBird(object):
    def __init__(self, stream, pattern_len):
        '''
        Baker-bird 알고리즘 이니셜라이징
        :param patterns: Baker-bird 알고리즘을 실행할 패턴 배열
        '''
        self.ac = AhoCorasick()
        for _ in range(pattern_len):
            line = stream.next()
            self.ac.add_patterns(line)
        self.ac.build()

        self.r = {}
        idx = 1

        stream.set_seek()
        for _ in range(pattern_len):
            row = stream.next()
            if row not in self.r.keys():
                self.r[row] = str(idx)
                idx += 1

        stream.set_seek()
        self.kmp = KMP("".join([str(self.r[stream.next()]) for _ in range(pattern_len)]))

    def __call__(self, stream, text_len):
        '''
        Baker-bird 알고리즘 수행, extra space를 최적화 하기 위해 기존 Aho-corasick으로 만들어 낸 2차원 배열을 KMP를 모두 훑는 방식이 아닌
        Aho-corasick 한줄을 수행 후 패턴 크기만큼의 배열만을 생성 유지하고, KMP를 step별로 계산하는 방식으로 구현
        :param text: 매칭할 텍스트
        :return: 매칭이 일어난 끝 좌표를 튜플 배열 형태로 반환
        '''
        ret = []
        position = [0,] * text_len
        for i in range(text_len):
            row = stream.next()
            row_R = ["0",] * text_len
            for start, end, keyword in self.ac(row):
                row_R[end-1] = self.r[keyword]

            for idx, R in enumerate(row_R):
                position[idx] = self.kmp.step(R, position[idx])
                if position[idx] is len(self.kmp.keyword):
                    ret.append((i, idx))
                    position[idx] = self.kmp.pi[position[idx]-1]
        return ret
コード例 #7
0
    tock = time.time()
    print('re search time:', tock - tick)

    tick = time.time()
    for text in text_iterator():
        for keyword in keywords:
            result = bf_search(text.strip(), keyword)
            # if result:
            #     print(keyword, result)
        tock = time.time()
    print('bf search time:', tock - tick)

    tick = time.time()
    for text in text_iterator():
        for keyword in keywords:
            result = KMP().search(text.strip(), keyword)
            # if result:
            #     print(keyword, result)
    tock = time.time()
    print('kmp search time:', tock - tick)

    tick = time.time()
    for text in text_iterator():
        result = ac_automation.search(text.strip())
    tock = time.time()
    # word2pos = defaultdict(list)
    # for start, end in result:
    #     word2pos[line[start: end+1]].append((start, end))
    # for word, pos in word2pos.items():
    #     print(word, pos)
    print('ac search time:', tock - tick)
コード例 #8
0
from kmp import KMP
from fa import AUTOMATA
import sys

matcher = sys.argv[1]
pattern = sys.argv[2]
file = open(sys.argv[3], "r")
text = ""
for a in file.read().splitlines():
    text += a

if matcher == "KMP":

    print("wyszukiwanie wzorca za pomoca KMP")
    print("=================================")
    kmp = KMP(text, pattern)
    kmp.kmp()

else:

    print("wyszukiwanie wzorca za pomoca FA")
    print("================================")
    finite = AUTOMATA(text, pattern)
    finite.automata_matcher()
コード例 #9
0
def kmp_array(fasta_list):
    kmp = KMP()
   
    kmp.build_kmp_table(fasta_list[0][1], print_table = True)
コード例 #10
0
 def fifthw(self):
     QMessageBox.information(self, "规则","请输入 用半角逗号间隔的字符串 第一串最少6个字符最多20个 第二串最少一个 第二串字符数不超过第一串 回车完成输入",QMessageBox.Yes)
     from kmp import KMP
     self.demo5 = KMP()