Python Automaton.get_matches 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: aca

클래스/타입: Automaton

메소드/함수: get_matches

hotexamples.com에서의 예제들: 9

Python Automaton.get_matches - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 aca.Automaton.get_matches에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Automaton(18)

get_matches(9)

add_all(8)

add(6)

items(3)

load_from_file(3)

prefixes(3)

save_to_file(3)

update_automaton(3)

load_from_string(2)

save_to_string(2)

get(1)

has_pattern(1)

has_prefix(1)

str(1)

예제 #1

파일 보기

파일: test_serialize.py 프로젝트: WeeJang/aca

def test_names():
    auto = Automaton()
    auto.add_all(NAMES)
    auto.update_automaton()
    auto_matches = [(m.start, m.end) for m in auto.get_matches(TEXT)]

    with TemporaryDirectory() as tmpdir:
        #tmpdir = ''
        fnm = os.path.join(tmpdir, 'test.aca')
        auto.save_to_file(fnm)
        auto2 = Automaton()
        auto2.load_from_file(fnm)

    auto2_matches = [(m.start, m.end) for m in auto2.get_matches(TEXT)]
    assert list(auto.items()) == list(auto2.items())
    assert list(auto.prefixes()) == list(auto2.prefixes())
    assert auto_matches == auto2_matches

    auto3 = Automaton()
    auto3.load_from_string(auto2.save_to_string())
    auto3_matches = [(m.start, m.end) for m in auto3.get_matches(TEXT)]

    assert list(auto.items()) == list(auto2.items())
    assert list(auto.prefixes()) == list(auto2.prefixes())
    assert auto_matches == auto3_matches

예제 #2

파일 보기

def test_with_updating():
    auto = Automaton()
    auto.add('hers')
    matches = auto.get_matches('ushers')
    assert len(matches) == 1
    auto.add('us')
    matches = auto.get_matches('ushers')
    assert len(matches) == 2

예제 #3

파일 보기

def test_automaton_with_words():
    auto = Automaton()
    for token in ['he', 'she', 'his', 'hers']:
        auto.add(token)

    expected_all_matches = [
        Match(1, 4, 'Y'), Match(2, 4, 'Y'),
        Match(2, 6, 'Y')
    ]
    all_matches = auto.get_matches('ushers', exclude_overlaps=False)
    print(all_matches)
    assert expected_all_matches == all_matches

    expected_nonoverlap_matches = [Match(2, 6, 'Y')]
    nonoverlap_matches = auto.get_matches('ushers', exclude_overlaps=True)
    assert expected_nonoverlap_matches == nonoverlap_matches

예제 #4

파일 보기

def test_names():
    auto = Automaton(NAMES)
    auto.add_all(NAMES)
    print (auto)

    matches = set(' '.join(match.elems) for match in auto.get_matches(TEXT.split()))
    names = set(' '.join(name) for name in NAMES)

    assert names == matches

예제 #5

파일 보기

def test_with_words():
    auto = Automaton()
    auto.add(['funderbeam'])
    auto.add(['mattermark'])
    auto.add(['500', 'startups'])

    txt = 'funderbeam and mattermark along with 500 startups'.split()
    expected = [Match(0, 1, 'Y'), Match(2, 3, 'Y'), Match(5, 7, 'Y')]
    actual = auto.get_matches(txt)
    assert expected == actual

예제 #6

파일 보기

def test_lemmas():
    auto = Automaton()
    auto.add(['sunlabob'], 'CO')
    auto.add(['renewable'], 'CO')
    lemmas = [
        'sunlabob', 'renewable', 'energy', 'receive', '$', '2.1', 'million',
        'investment'
    ]
    print(auto.str())
    matches = auto.get_matches(lemmas)
    assert len(matches) == 2

예제 #7

파일 보기

def test_map_interface():
    auto = Automaton()
    auto['us'] = 'USA'
    auto['her'] = 'EUROPE'
    assert auto['us'] == 'USA'
    assert auto['her'] == 'EUROPE'
    matches = auto.get_matches('usher')
    assert len(matches) == 2
    assert matches[0].label == 'USA'
    assert matches[1].label == 'EUROPE'

    assert 'us' in auto
    assert 'his' not in auto

예제 #8

파일 보기

파일: example2.py 프로젝트: WeeJang/aca

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function, absolute_import

# create a new AC automaton
from aca import Automaton
automaton = Automaton()

# instead of plain strings, you can also use lists of tokens
names = [
    (['Yuri', 'Artyukhin'], 'developer'),
    (['Tom', 'Anderson', 'Jr'], 'designer'),
]
automaton.add_all(names)

# you can add an item like this as well
automaton[['Tom', 'Anderson']] = 'manager'

# if you are not using plain strings, make sure you tokenize the text as well
text = 'Tom Anderson Jr and Yuri Artyukhin work on my project'.split()

print('matches that maximize the number of matched words')
for match in automaton.get_matches(text):
    print(match.start, match.end, match.elems, match.label)

print('all matches')
for match in automaton.get_matches(text, exclude_overlaps=False):
    print(match.start, match.end, match.elems, match.label)

예제 #9

파일 보기

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function, absolute_import

# create a new AC automaton
from aca import Automaton
automaton = Automaton()

# add a dictionary of words to the automaton
painkillers = ['paracetamol', 'ibuprofen', 'hydrocloride']
automaton.add_all(painkillers)

# match the dictionary on a text
text = 'paracetamol and hydrocloride are a medications to relieve pain and fever. paracetamol is less efficient than ibuprofen'

for match in automaton.get_matches(text):
    print(match.start, match.end, match.elems)