Example #1
0
def test_1():
    sensitive_path = 'sensitive/sensitive.txt'
    dfa = DFA()
    dfa.load_sensitive(sensitive_path)
    dfa = DFA()
    #dfa.load_from_yaml('automata/mydata.yaml')
    dfa.load_from_pickle('automata/mydata.json')
    dfa.accepts('我爱北京天安门\n')
    dfa.accepts('我喜欢做假证\n')
    dfa.accepts('我喜欢做嗳\n')
    with open(sensitive_path, 'r', encoding='UTF-8') as f:
        lines = f.readlines()
    result = []
    lines = [i for i in lines if i != '\n']
    for i in lines:
        i = '我喜欢' + i
        print(i)
        dfa.accepts(i)
    start_time = time.time()
    for i in lines:
        i = '我喜欢' + i
        result.append(dfa.predict(i))
    during_time = time.time() - start_time
    print(r'耗时:', during_time)
    print(r'平均z耗时:', during_time / len(lines))
    result = np.array(result)
    print(r'正确个数:', len(result[result == True]))
    print(r'错误个数:', len(result[result == False]))
    print(r'正确率:', len(result[result == True]) / len(result))
Example #2
0
def test_2():
    sensitive_path = 'sensitive/sensitive.txt'
    dfa = DFA()
    dfa.load_from_pickle('automata/mydata.json')
    dfa.accepts('我爱北京天安门\n')
    dfa.accepts('我喜欢做假证\n')
    dfa.accepts('我喜欢做嗳\n')
    with open(sensitive_path, 'r', encoding='UTF-8') as f:
        lines = f.readlines()
    result = []
    lines = [i for i in lines if i != '\n']
    #for i in lines:
    #print(i, '*70')
    #i = ('我喜欢' + i)*70
    #print(dfa.get_sensitive_words(i))
    start_time = time.time()
    for i in lines:
        i = ('我喜欢' + i) * 2
        dfa.get_sensitive_words(i)
        result.append(True)
    during_time = time.time() - start_time
    print(r'耗时:', during_time)
    print(r'平均z耗时:', during_time / len(lines))
    start_time = time.time()
    print(i)
    i = i * 200
    a = dfa.get_sensitive_words(i)
    during_time = time.time() - start_time
    print(a)
    print(r'耗时:', during_time)
    start_time = time.time()
    for i in lines:
        i = ('我喜欢' + i) * 4
        dfa.get_sensitive_words(i)
        result.append(True)
    during_time = time.time() - start_time
    print(r'耗时:', during_time)
    print(r'平均z耗时:', during_time / len(lines))
    start_time = time.time()
    for i in lines:
        i = ('我喜欢' + i) * 8
        dfa.get_sensitive_words(i)
        result.append(True)
    during_time = time.time() - start_time
    print(r'耗时:', during_time)
    print(r'平均z耗时:', during_time / len(lines))
    start_time = time.time()
    for i in lines:
        i = ('我喜欢' + i) * 16
        dfa.get_sensitive_words(i)
        result.append(True)
    during_time = time.time() - start_time
    print(r'耗时:', during_time)
    print(r'平均z耗时:', during_time / len(lines))
    result = np.array(result)
    print(r'正确个数:', len(result[result == True]))
    print(r'错误个数:', len(result[result == False]))
    print(r'正确率:', len(result[result == True]) / len(result))