Python Automaton.add_all Examples

Programming Language: Python

Namespace/Package Name: aca

Class/Type: Automaton

Method/Function: add_all

Examples at hotexamples.com: 8

Python Automaton.add_all - 8 examples found. These are the top rated real world Python examples of aca.Automaton.add_all extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Automaton(18)

get_matches(9)

add_all(8)

add(6)

items(3)

load_from_file(3)

prefixes(3)

save_to_file(3)

update_automaton(3)

load_from_string(2)

save_to_string(2)

get(1)

has_pattern(1)

has_prefix(1)

str(1)

Example #1

Show file

File: test_serialize.py Project: WeeJang/aca

def test_names():
    auto = Automaton()
    auto.add_all(NAMES)
    auto.update_automaton()
    auto_matches = [(m.start, m.end) for m in auto.get_matches(TEXT)]

    with TemporaryDirectory() as tmpdir:
        #tmpdir = ''
        fnm = os.path.join(tmpdir, 'test.aca')
        auto.save_to_file(fnm)
        auto2 = Automaton()
        auto2.load_from_file(fnm)

    auto2_matches = [(m.start, m.end) for m in auto2.get_matches(TEXT)]
    assert list(auto.items()) == list(auto2.items())
    assert list(auto.prefixes()) == list(auto2.prefixes())
    assert auto_matches == auto2_matches

    auto3 = Automaton()
    auto3.load_from_string(auto2.save_to_string())
    auto3_matches = [(m.start, m.end) for m in auto3.get_matches(TEXT)]

    assert list(auto.items()) == list(auto2.items())
    assert list(auto.prefixes()) == list(auto2.prefixes())
    assert auto_matches == auto3_matches

Example #2

Show file

    def __init__(self, service_type, label_conf_dict_path):
        self.__level1_keywords__ = []
        self.__level1_tag__ = []
        self.__level1_automaton__ = []

        label_file = "%s.rule.dat" % (service_type)
        level1_keywords_map_file_path = os.path.join(label_conf_dict_path,
                                                     label_file)

        with open(level1_keywords_map_file_path) as level1_f:
            for line in level1_f:
                if not line:
                    continue
                line = line.decode("utf-8").strip()
                if not line:
                    continue
                if line.startswith("#"):
                    continue
                line_arr = line.split(":")
                keywords_list = line_arr[0].split(" ")
                level1_list = line_arr[1].split(",")

                self.__level1_keywords__.append(keywords_list)
                self.__level1_tag__.append(level1_list)
                automaton = Automaton()
                automaton.add_all(keywords_list)
                self.__level1_automaton__.append(automaton)

Example #3

Show file

File: test_iterations.py Project: WeeJang/aca

def test_items():
    auto = Automaton()
    auto.add_all(names)
    ens, evs = zip(*sorted(names))
    ns, vs = zip(*list(auto.items()))
    ns = [''.join(n) for n in ns]
    assert list(ens) == list(ns)
    assert list(evs) == list(vs)

Example #4

Show file

def test_names():
    auto = Automaton(NAMES)
    auto.add_all(NAMES)
    print (auto)

    matches = set(' '.join(match.elems) for match in auto.get_matches(TEXT.split()))
    names = set(' '.join(name) for name in NAMES)

    assert names == matches

Example #5

Show file

File: example2.py Project: WeeJang/aca

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function, absolute_import

# create a new AC automaton
from aca import Automaton
automaton = Automaton()

# instead of plain strings, you can also use lists of tokens
names = [
    (['Yuri', 'Artyukhin'], 'developer'),
    (['Tom', 'Anderson', 'Jr'], 'designer'),
]
automaton.add_all(names)

# you can add an item like this as well
automaton[['Tom', 'Anderson']] = 'manager'

# if you are not using plain strings, make sure you tokenize the text as well
text = 'Tom Anderson Jr and Yuri Artyukhin work on my project'.split()

print('matches that maximize the number of matched words')
for match in automaton.get_matches(text):
    print(match.start, match.end, match.elems, match.label)

print('all matches')
for match in automaton.get_matches(text, exclude_overlaps=False):
    print(match.start, match.end, match.elems, match.label)

Example #6

Show file

File: example5.py Project: WeeJang/aca

# Import the library and initiate the automaton
from aca import Automaton
automaton = Automaton()

# add the entities and build the automaton
automaton.add_all(['Funderbeam', 'Funderbeam Data', 'Funderbeam Markets'])
automaton.update_automaton()

# find matches
text = 'Funderbeam Data and Funderbeam Markets are two different products of Funderbeam'
for match in automaton.get_matches(text, exclude_overlaps=False):
    print(match.start, match.end, match.elems)

for match in automaton.get_matches(text, exclude_overlaps=True):
    print(match.start, match.end, match.elems)

Example #7

Show file

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function, absolute_import

# create a new AC automaton
from aca import Automaton
automaton = Automaton()

# add a dictionary of words to the automaton
painkillers = ['paracetamol', 'ibuprofen', 'hydrocloride']
automaton.add_all(painkillers)

# match the dictionary on a text
text = 'paracetamol and hydrocloride are a medications to relieve pain and fever. paracetamol is less efficient than ibuprofen'

for match in automaton.get_matches(text):
    print(match.start, match.end, match.elems)

Example #8

Show file

File: test_iterations.py Project: WeeJang/aca

def test_prefixes():
    auto = Automaton()
    auto.add_all(['jaanus', 'janek', 'janis'])
    prefixes, values = zip(*auto.prefixes())
    prefixes = [''.join(prefix) for prefix in prefixes]
    assert prefixes == ['', 'j', 'ja', 'jaa', 'jaan', 'jaanu', 'jaanus', 'jan', 'jane', 'janek', 'jani', 'janis']