예제 #1
0
def test_conversion(input_graph, mocker):
    converter = c.NfaToDfaConverter(input_graph)
    # Mainly interested in the end result.
    # Still mock to see that there are no unknown elements
    add_spy = mocker.spy(converter.dfa, "add_state")
    symbol_transition_spy = mocker.spy(converter.dfa, "set_symbol_transition")
    nwc_transition_spy = mocker.spy(
        converter.dfa,
        "set_non_word_char_transition")
    acceptance_spy = mocker.spy(converter.dfa, "add_acceptances")
    result = converter.start_conversion()
    assert converter.queue.qsize() == 0
    assert len(converter.state_cache) == 6
    assert len(converter.state_represents) == 6
    # One less call than states in the graph,
    # because the initial state was added during construction.
    assert add_spy.call_count == 5
    assert nwc_transition_spy.call_count == 1
    assert symbol_transition_spy.call_count == 7
    assert acceptance_spy.call_count == 1
    state01 = converter.state_cache[frozenset([0, 1])]
    state02 = converter.state_cache[frozenset([0, 2])]
    assert result.states[state01].symbol_transitions[symbol0] == state02
    state3 = converter.state_cache[frozenset([3])]
    assert result.states[state01].symbol_transitions[symbol1] == state3
    assert result.states[state02].symbol_transitions[symbol1] == state3
    state24 = converter.state_cache[frozenset([2, 4])]
    assert result.states[state02].symbol_transitions[symbol0] == state24
    state5 = converter.state_cache[frozenset([5])]
    assert result.states[state24].symbol_transitions[symbol1] == state5
    state4 = converter.state_cache[frozenset([4])]
    assert result.states[state24].symbol_transitions[symbol0] == state4
    assert result.states[state4].symbol_transitions[symbol1] == state5
    assert result.states[state5].accepts == [accept]
예제 #2
0
def test_alternation_right_kleene():
    graph = nfa.Nfa()
    construction = const.ConstructionState(graph, 'a|b|c*', accept)
    construction.construct()
    graph.remove_empty_transitions()
    dfa_graph = conv.NfaToDfaConverter(graph).start_conversion()

    res = list(dfa_graph.search(''))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('ccccc'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('a'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('b'))
    assert len(res) == 1
    assert res[0][0] == accept
    res = list(dfa_graph.search('bb'))
    assert len(res) == 0
    res = list(dfa_graph.search('aa'))
    assert len(res) == 0
    res = list(dfa_graph.search('ca'))
    assert len(res) == 0
예제 #3
0
def test_transition_collection(input_graph):
    converter = c.NfaToDfaConverter(input_graph)
    state_set = {0, 1, 3, 5}
    (symbol_transitions, non_word_char_transitions,
     accepts) = converter._collect_nfa_transitions(state_set)
    for state_id in state_set:
        state = input_graph.states[state_id]
        for k, idxs in state.symbol_transitions.items():
            for idx in idxs:
                assert idx in symbol_transitions[k]
        for v in state.non_word_char_transitions:
            assert v in non_word_char_transitions
        for v in state.accepts:
            assert v in accepts
    for k, idxs in symbol_transitions.items():
        for idx in idxs:
            found = False
            for state in state_set:
                nfa_state = input_graph.states[state]
                if idx in nfa_state.symbol_transitions.get(k, []):
                    found = True
            assert found
    for v in non_word_char_transitions:
        found = False
        for state in state_set:
            if v in input_graph.states[state].non_word_char_transitions:
                found = True
        assert found
    for v in accepts:
        found = False
        for state in state_set:
            if v in input_graph.states[state].accepts:
                found = True
        assert found
예제 #4
0
def test_retrieves_existing_state(input_graph):
    converter = c.NfaToDfaConverter(input_graph)
    new_set = frozenset(input_graph.starts)
    converter._get_or_create_dfa_state(new_set)
    assert len(converter.dfa.states) == 1
    assert converter.queue.qsize() == 1
    assert len(converter.state_cache) == 1
    assert len(converter.state_represents) == 1
예제 #5
0
def test_initialization(input_graph):
    converter = c.NfaToDfaConverter(input_graph)
    start_set = frozenset(input_graph.starts)
    assert len(converter.dfa.states) == 1
    assert converter.queue.qsize() == 1
    assert len(converter.state_cache) == 1
    assert len(converter.state_represents) == 1
    assert converter.state_represents[0] == list(start_set)
    assert converter.state_cache[start_set] == 0
예제 #6
0
def test_creates_new_state(input_graph):
    converter = c.NfaToDfaConverter(input_graph)
    new_set = frozenset([0, 2])
    converter._get_or_create_dfa_state(new_set)
    assert len(converter.dfa.states) == 2
    assert converter.queue.qsize() == 2
    assert len(converter.state_cache) == 2
    assert len(converter.state_represents) == 2
    assert converter.state_represents[1] == list(new_set)
    assert converter.state_cache[new_set] == 1
예제 #7
0
    def _init(self):
        all_deprecated = set(t.extract_deprecated(self.graph))
        concepts = set(
            t.extract_by_type_uri(self.graph,
                                  self.concept_type_uri,
                                  remove=all_deprecated))
        thesauri = set(
            t.extract_by_type_uri(self.graph,
                                  self.sub_thesaurus_type_uri,
                                  remove=all_deprecated))
        self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts))))
        thesaurus_features = ThesaurusFeatureTransformation(
            self.graph, concepts, thesauri, self.thesaurus_relation_type_uri,
            self.thesaurus_relation_is_specialisation)
        labels = t.retrieve_concept_labels(self.graph,
                                           allowed=concepts,
                                           langs=self.langs)
        nfautomat = nfa.Nfa()
        if self.handle_title_case:
            case_handler = case_handlers.title_case_handler
        else:
            case_handler = case_handlers.sentence_case_handler
        expansion_funs = expansion.collect_expansion_functions(
            extract_upper_case_from_braces=self.extract_upper_case_from_braces,
            extract_any_case_from_braces=self.extract_any_case_from_braces,
            expand_ampersand_with_spaces=self.expand_ampersand_with_spaces,
            expand_abbreviation_with_punctuation=(
                self.expand_abbreviation_with_punctuation),
        )
        if self.simple_english_plural_rules:
            plural_fun = expansion.simple_english_plural_fun
        else:

            def plural_fun(x):
                return x

        for concept, label in labels:
            expanded = label
            for f in expansion_funs:
                expanded = f(expanded)
            _handle_construction(
                construction.ConstructionState(
                    nfautomat, plural_fun(case_handler(expanded)),
                    str(concept)), concept, label)
        nfautomat.remove_empty_transitions()
        converter = conversion.NfaToDfaConverter(nfautomat)
        self.dfa_ = converter.start_conversion()
        self.pipeline_ = Pipeline([
            ("Combined Features",
             ColumnTransformer([("Thesaurus Features", thesaurus_features, 0),
                                ("Text Features", mk_text_features(), 1)])),
            ("Classifier",
             DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100))
        ])
def regression_test_graph():
    automaton = nfa.Nfa()
    concept_tuples = [
        (label_global, id_global),
        (label_economic, id_economic),
        (label_crisis, id_crisis),
        (label_global_economic, id_global_economic),
        (label_economic_crisis, id_economic_crisis),
        (label_global_economic_crisis, id_global_economic_crisis),
    ]
    for label, concept_id in concept_tuples:
        const.ConstructionState(automaton, label, concept_id).construct()
    automaton.remove_empty_transitions()
    dfa = conv.NfaToDfaConverter(automaton).start_conversion()
    return dfa
예제 #9
0
def test_transition_creation(input_graph):
    converter = c.NfaToDfaConverter(input_graph)
    set13 = frozenset([1, 3])
    set24 = frozenset([2, 4])
    set5 = frozenset([5])
    acceptance = {accept}
    non_word_char_transitions = set(set24)
    symbol_transitions = {symbol0: set(set13), symbol1: set(set5)}
    converter._create_dfa_transitions(0, symbol_transitions,
                                      non_word_char_transitions, acceptance)
    state = converter.dfa.states[0]
    assert state.accepts == [accept]
    assert state.non_word_char_transition == converter.state_cache[set24]
    assert state.symbol_transitions[symbol0] == converter.state_cache[set13]
    assert state.symbol_transitions[symbol1] == converter.state_cache[set5]