def test_alternation_right_kleene(): graph = nfa.Nfa() construction = const.ConstructionState(graph, 'a|b|c*', accept) construction.construct() graph.remove_empty_transitions() dfa_graph = conv.NfaToDfaConverter(graph).start_conversion() res = list(dfa_graph.search('')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('ccccc')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('a')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('b')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('bb')) assert len(res) == 0 res = list(dfa_graph.search('aa')) assert len(res) == 0 res = list(dfa_graph.search('ca')) assert len(res) == 0
def test_registers_escape(): graph = nfa.Nfa() construction = c.ConstructionState(graph, '\\', accept) construction._set_up() construction._perform_step(0) assert graph.starts == [] assert construction.escape_next
def test_handles_multiple_alternations(): graph = nfa.Nfa() construction = c.ConstructionState(graph, 'a|b|c', accept) construction.construct() assert graph.starts == [0] assert graph.states[-1].accepts == [accept] assert graph.states[-2].accepts == [accept] assert graph.states[-3].accepts == [accept]
def test_handles_escaped_symbol(mocker): graph = nfa.Nfa() construction = c.ConstructionState(graph, '?', accept) construction._set_up() assert graph.starts == [] construction.escape_next = True spy = mocker.spy(construction, "_process_symbol") construction._perform_step(0) spy.assert_called_once_with('?')
def test_adds_acceptance(): graph = nfa.Nfa() construction = c.ConstructionState(graph, "a|b", accept) construction.construct() assert graph.starts == [0] assert graph.states[-1].accepts == [accept] assert graph.states[-2].accepts == [accept] assert graph.states[3].non_word_char_transitions == {7} assert graph.states[5].non_word_char_transitions == {6} assert graph.states[1].non_word_char_transitions == set() assert graph.states[1].empty_transitions == {2, 4}
def _init(self): all_deprecated = set(t.extract_deprecated(self.graph)) concepts = set( t.extract_by_type_uri(self.graph, self.concept_type_uri, remove=all_deprecated)) thesauri = set( t.extract_by_type_uri(self.graph, self.sub_thesaurus_type_uri, remove=all_deprecated)) self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts)))) thesaurus_features = ThesaurusFeatureTransformation( self.graph, concepts, thesauri, self.thesaurus_relation_type_uri, self.thesaurus_relation_is_specialisation) labels = t.retrieve_concept_labels(self.graph, allowed=concepts, langs=self.langs) nfautomat = nfa.Nfa() if self.handle_title_case: case_handler = case_handlers.title_case_handler else: case_handler = case_handlers.sentence_case_handler expansion_funs = expansion.collect_expansion_functions( extract_upper_case_from_braces=self.extract_upper_case_from_braces, extract_any_case_from_braces=self.extract_any_case_from_braces, expand_ampersand_with_spaces=self.expand_ampersand_with_spaces, expand_abbreviation_with_punctuation=( self.expand_abbreviation_with_punctuation), ) if self.simple_english_plural_rules: plural_fun = expansion.simple_english_plural_fun else: def plural_fun(x): return x for concept, label in labels: expanded = label for f in expansion_funs: expanded = f(expanded) _handle_construction( construction.ConstructionState( nfautomat, plural_fun(case_handler(expanded)), str(concept)), concept, label) nfautomat.remove_empty_transitions() converter = conversion.NfaToDfaConverter(nfautomat) self.dfa_ = converter.start_conversion() self.pipeline_ = Pipeline([ ("Combined Features", ColumnTransformer([("Thesaurus Features", thesaurus_features, 0), ("Text Features", mk_text_features(), 1)])), ("Classifier", DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100)) ])
def test_init_with_existing_graph(input_graph): construction = c.ConstructionState(input_graph, expression, accept) construction._set_up() assert len(input_graph.states) == 9 assert input_graph.starts == [0, 1] assert input_graph.states[6].non_word_char_transitions == {7} assert input_graph.states[7].empty_transitions == {8} assert construction.append_to == [8] assert construction.expression == expression assert construction.before_braces == [[7], [8]] assert construction.dangling_alternations.stack == [] assert not construction.escape_next assert construction.accept == accept
def test_handles_alternation(input_graph): append_to = [2, 4, 5] before_braces = [[0], [1, 3]] after_braces = [6, 7] construction = c.ConstructionState(input_graph, '|', accept) construction._set_up() construction.append_to = append_to.copy() construction.before_braces = before_braces.copy() construction.after_braces = after_braces.copy() construction._perform_step(0) assert input_graph.starts == [0, 1] assert construction.append_to == [len(construction.graph.states) - 1] assert construction.dangling_alternations.stack[-1] == append_to
def test_init_with_empty_graph(): graph = nfa.Nfa() construction = c.ConstructionState(graph, expression, accept) construction._set_up() assert len(graph.states) == 3 assert graph.starts == [] assert graph.states[0].non_word_char_transitions == {1} assert graph.states[1].empty_transitions == {2} assert construction.append_to == [2] assert construction.expression == expression assert construction.before_braces == [[1], [2]] assert construction.dangling_alternations.stack == [] assert not construction.escape_next assert construction.accept == accept
def test_handles_optional(input_graph): append_to = [2, 4, 5] before_braces = [[0], [1, 3]] after_braces = [6, 7] construction = c.ConstructionState(input_graph, '?', accept) construction._set_up() construction.append_to = append_to.copy() construction.before_braces = before_braces.copy() construction.after_braces = after_braces.copy() construction._perform_step(0) assert input_graph.starts == [0, 1] for idx in construction.append_to: for bb_idx in construction.before_braces[-1]: assert idx in input_graph.states[bb_idx].empty_transitions
def regression_test_graph(): automaton = nfa.Nfa() concept_tuples = [ (label_global, id_global), (label_economic, id_economic), (label_crisis, id_crisis), (label_global_economic, id_global_economic), (label_economic_crisis, id_economic_crisis), (label_global_economic_crisis, id_global_economic_crisis), ] for label, concept_id in concept_tuples: const.ConstructionState(automaton, label, concept_id).construct() automaton.remove_empty_transitions() dfa = conv.NfaToDfaConverter(automaton).start_conversion() return dfa
def test_handles_kleene_closure(input_graph): append_to = [2, 4, 5] before_braces = [[0], [1, 3]] construction = c.ConstructionState(input_graph, '*', accept) construction._set_up() construction.append_to = append_to.copy() construction.before_braces = before_braces.copy() construction._perform_step(0) assert input_graph.starts == [0, 1] for idx in append_to: for bb_idx in construction.before_braces[-1]: assert bb_idx in input_graph.states[idx].empty_transitions for bb_idx in construction.before_braces[-1]: assert len(input_graph.states ) - 1 in input_graph.states[bb_idx].empty_transitions
def test_handles_closing_brace(input_graph, mocker): append_to = [2, 4, 5] old_append_len = len(append_to) before_braces = [[0], [1, 3]] ret = [12, 14] construction = c.ConstructionState(input_graph, ')', accept) construction._set_up() mocker.patch.object(construction.dangling_alternations, "pop", lambda: ret) construction.append_to = append_to.copy() construction.before_braces = before_braces.copy() construction._perform_step(0) assert input_graph.starts == [0, 1] assert construction.append_to[:old_append_len] == append_to assert construction.append_to[old_append_len:] == ret assert construction.before_braces == before_braces[:-1]
def test_handles_opening_brace(input_graph): append_to = [2, 4, 5] before_braces = [[0], [1, 3]] construction = c.ConstructionState(input_graph, '(', accept) construction._set_up() construction.append_to = append_to.copy() construction.before_braces = before_braces.copy() construction._perform_step(0) assert input_graph.starts == [0, 1] new_state_idx = len(input_graph.states) - 1 assert construction.dangling_alternations.pop() == [] assert construction.before_braces[:-2] == before_braces[:-1] assert construction.before_braces[-2] == append_to assert construction.before_braces[-1] == [new_state_idx] assert construction.append_to == [new_state_idx] for idx in append_to: assert new_state_idx in input_graph.states[idx].empty_transitions