コード例 #1
0
def test_alternation_right_kleene():
    graph = nfa.Nfa()
    construction = const.ConstructionState(graph, 'a|b|c*', accept)
    construction.construct()
    graph.remove_empty_transitions()
    dfa_graph = conv.NfaToDfaConverter(graph).start_conversion()

    res = list(dfa_graph.search(''))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('ccccc'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('a'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('b'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('bb'))
    assert len(res) == 0
    res = list(dfa_graph.search('aa'))
    assert len(res) == 0
    res = list(dfa_graph.search('ca'))
    assert len(res) == 0
コード例 #2
0
def test_registers_escape():
    graph = nfa.Nfa()
    construction = c.ConstructionState(graph, '\\', accept)
    construction._set_up()
    construction._perform_step(0)
    assert graph.starts == []
    assert construction.escape_next
コード例 #3
0
def test_handles_multiple_alternations():
    graph = nfa.Nfa()
    construction = c.ConstructionState(graph, 'a|b|c', accept)
    construction.construct()
    assert graph.starts == [0]
    assert graph.states[-1].accepts == [accept]
    assert graph.states[-2].accepts == [accept]
    assert graph.states[-3].accepts == [accept]
コード例 #4
0
def test_handles_escaped_symbol(mocker):
    graph = nfa.Nfa()
    construction = c.ConstructionState(graph, '?', accept)
    construction._set_up()
    assert graph.starts == []
    construction.escape_next = True
    spy = mocker.spy(construction, "_process_symbol")
    construction._perform_step(0)
    spy.assert_called_once_with('?')
コード例 #5
0
def test_adds_acceptance():
    graph = nfa.Nfa()
    construction = c.ConstructionState(graph, "a|b", accept)
    construction.construct()
    assert graph.starts == [0]
    assert graph.states[-1].accepts == [accept]
    assert graph.states[-2].accepts == [accept]
    assert graph.states[3].non_word_char_transitions == {7}
    assert graph.states[5].non_word_char_transitions == {6}
    assert graph.states[1].non_word_char_transitions == set()
    assert graph.states[1].empty_transitions == {2, 4}
コード例 #6
0
    def _init(self):
        all_deprecated = set(t.extract_deprecated(self.graph))
        concepts = set(
            t.extract_by_type_uri(self.graph,
                                  self.concept_type_uri,
                                  remove=all_deprecated))
        thesauri = set(
            t.extract_by_type_uri(self.graph,
                                  self.sub_thesaurus_type_uri,
                                  remove=all_deprecated))
        self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts))))
        thesaurus_features = ThesaurusFeatureTransformation(
            self.graph, concepts, thesauri, self.thesaurus_relation_type_uri,
            self.thesaurus_relation_is_specialisation)
        labels = t.retrieve_concept_labels(self.graph,
                                           allowed=concepts,
                                           langs=self.langs)
        nfautomat = nfa.Nfa()
        if self.handle_title_case:
            case_handler = case_handlers.title_case_handler
        else:
            case_handler = case_handlers.sentence_case_handler
        expansion_funs = expansion.collect_expansion_functions(
            extract_upper_case_from_braces=self.extract_upper_case_from_braces,
            extract_any_case_from_braces=self.extract_any_case_from_braces,
            expand_ampersand_with_spaces=self.expand_ampersand_with_spaces,
            expand_abbreviation_with_punctuation=(
                self.expand_abbreviation_with_punctuation),
        )
        if self.simple_english_plural_rules:
            plural_fun = expansion.simple_english_plural_fun
        else:

            def plural_fun(x):
                return x

        for concept, label in labels:
            expanded = label
            for f in expansion_funs:
                expanded = f(expanded)
            _handle_construction(
                construction.ConstructionState(
                    nfautomat, plural_fun(case_handler(expanded)),
                    str(concept)), concept, label)
        nfautomat.remove_empty_transitions()
        converter = conversion.NfaToDfaConverter(nfautomat)
        self.dfa_ = converter.start_conversion()
        self.pipeline_ = Pipeline([
            ("Combined Features",
             ColumnTransformer([("Thesaurus Features", thesaurus_features, 0),
                                ("Text Features", mk_text_features(), 1)])),
            ("Classifier",
             DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100))
        ])
コード例 #7
0
def test_init_with_existing_graph(input_graph):
    construction = c.ConstructionState(input_graph, expression, accept)
    construction._set_up()
    assert len(input_graph.states) == 9
    assert input_graph.starts == [0, 1]
    assert input_graph.states[6].non_word_char_transitions == {7}
    assert input_graph.states[7].empty_transitions == {8}
    assert construction.append_to == [8]
    assert construction.expression == expression
    assert construction.before_braces == [[7], [8]]
    assert construction.dangling_alternations.stack == []
    assert not construction.escape_next
    assert construction.accept == accept
コード例 #8
0
def test_handles_alternation(input_graph):
    append_to = [2, 4, 5]
    before_braces = [[0], [1, 3]]
    after_braces = [6, 7]
    construction = c.ConstructionState(input_graph, '|', accept)
    construction._set_up()
    construction.append_to = append_to.copy()
    construction.before_braces = before_braces.copy()
    construction.after_braces = after_braces.copy()
    construction._perform_step(0)
    assert input_graph.starts == [0, 1]
    assert construction.append_to == [len(construction.graph.states) - 1]
    assert construction.dangling_alternations.stack[-1] == append_to
コード例 #9
0
def test_init_with_empty_graph():
    graph = nfa.Nfa()
    construction = c.ConstructionState(graph, expression, accept)
    construction._set_up()
    assert len(graph.states) == 3
    assert graph.starts == []
    assert graph.states[0].non_word_char_transitions == {1}
    assert graph.states[1].empty_transitions == {2}
    assert construction.append_to == [2]
    assert construction.expression == expression
    assert construction.before_braces == [[1], [2]]
    assert construction.dangling_alternations.stack == []
    assert not construction.escape_next
    assert construction.accept == accept
コード例 #10
0
def test_handles_optional(input_graph):
    append_to = [2, 4, 5]
    before_braces = [[0], [1, 3]]
    after_braces = [6, 7]
    construction = c.ConstructionState(input_graph, '?', accept)
    construction._set_up()
    construction.append_to = append_to.copy()
    construction.before_braces = before_braces.copy()
    construction.after_braces = after_braces.copy()
    construction._perform_step(0)
    assert input_graph.starts == [0, 1]
    for idx in construction.append_to:
        for bb_idx in construction.before_braces[-1]:
            assert idx in input_graph.states[bb_idx].empty_transitions
コード例 #11
0
def regression_test_graph():
    automaton = nfa.Nfa()
    concept_tuples = [
        (label_global, id_global),
        (label_economic, id_economic),
        (label_crisis, id_crisis),
        (label_global_economic, id_global_economic),
        (label_economic_crisis, id_economic_crisis),
        (label_global_economic_crisis, id_global_economic_crisis),
    ]
    for label, concept_id in concept_tuples:
        const.ConstructionState(automaton, label, concept_id).construct()
    automaton.remove_empty_transitions()
    dfa = conv.NfaToDfaConverter(automaton).start_conversion()
    return dfa
コード例 #12
0
def test_handles_kleene_closure(input_graph):
    append_to = [2, 4, 5]
    before_braces = [[0], [1, 3]]
    construction = c.ConstructionState(input_graph, '*', accept)
    construction._set_up()
    construction.append_to = append_to.copy()
    construction.before_braces = before_braces.copy()
    construction._perform_step(0)
    assert input_graph.starts == [0, 1]
    for idx in append_to:
        for bb_idx in construction.before_braces[-1]:
            assert bb_idx in input_graph.states[idx].empty_transitions
    for bb_idx in construction.before_braces[-1]:
        assert len(input_graph.states
                   ) - 1 in input_graph.states[bb_idx].empty_transitions
コード例 #13
0
def test_handles_closing_brace(input_graph, mocker):
    append_to = [2, 4, 5]
    old_append_len = len(append_to)
    before_braces = [[0], [1, 3]]
    ret = [12, 14]
    construction = c.ConstructionState(input_graph, ')', accept)
    construction._set_up()
    mocker.patch.object(construction.dangling_alternations, "pop", lambda: ret)
    construction.append_to = append_to.copy()
    construction.before_braces = before_braces.copy()
    construction._perform_step(0)
    assert input_graph.starts == [0, 1]
    assert construction.append_to[:old_append_len] == append_to
    assert construction.append_to[old_append_len:] == ret
    assert construction.before_braces == before_braces[:-1]
コード例 #14
0
def test_handles_opening_brace(input_graph):
    append_to = [2, 4, 5]
    before_braces = [[0], [1, 3]]
    construction = c.ConstructionState(input_graph, '(', accept)
    construction._set_up()
    construction.append_to = append_to.copy()
    construction.before_braces = before_braces.copy()
    construction._perform_step(0)
    assert input_graph.starts == [0, 1]
    new_state_idx = len(input_graph.states) - 1
    assert construction.dangling_alternations.pop() == []
    assert construction.before_braces[:-2] == before_braces[:-1]
    assert construction.before_braces[-2] == append_to
    assert construction.before_braces[-1] == [new_state_idx]
    assert construction.append_to == [new_state_idx]
    for idx in append_to:
        assert new_state_idx in input_graph.states[idx].empty_transitions