def test_registers_escape(): graph = nfa.Nfa() construction = c.ConstructionState(graph, '\\', accept) construction._set_up() construction._perform_step(0) assert graph.starts == [] assert construction.escape_next
def test_alternation_left_kleene(): graph = nfa.Nfa() construction = const.ConstructionState(graph, 'a*|b|c', accept) construction.construct() graph.remove_empty_transitions() dfa_graph = conv.NfaToDfaConverter(graph).start_conversion() res = list(dfa_graph.search('')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('aaaaa')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('c')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('b')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('bb')) assert len(res) == 0 res = list(dfa_graph.search('cc')) assert len(res) == 0 res = list(dfa_graph.search('ac')) assert len(res) == 0
def test_handles_multiple_alternations(): graph = nfa.Nfa() construction = c.ConstructionState(graph, 'a|b|c', accept) construction.construct() assert graph.starts == [0] assert graph.states[-1].accepts == [accept] assert graph.states[-2].accepts == [accept] assert graph.states[-3].accepts == [accept]
def test_handles_escaped_symbol(mocker): graph = nfa.Nfa() construction = c.ConstructionState(graph, '?', accept) construction._set_up() assert graph.starts == [] construction.escape_next = True spy = mocker.spy(construction, "_process_symbol") construction._perform_step(0) spy.assert_called_once_with('?')
def test_adds_acceptance(): graph = nfa.Nfa() construction = c.ConstructionState(graph, "a|b", accept) construction.construct() assert graph.starts == [0] assert graph.states[-1].accepts == [accept] assert graph.states[-2].accepts == [accept] assert graph.states[3].non_word_char_transitions == {7} assert graph.states[5].non_word_char_transitions == {6} assert graph.states[1].non_word_char_transitions == set() assert graph.states[1].empty_transitions == {2, 4}
def _init(self): all_deprecated = set(t.extract_deprecated(self.graph)) concepts = set( t.extract_by_type_uri(self.graph, self.concept_type_uri, remove=all_deprecated)) thesauri = set( t.extract_by_type_uri(self.graph, self.sub_thesaurus_type_uri, remove=all_deprecated)) self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts)))) thesaurus_features = ThesaurusFeatureTransformation( self.graph, concepts, thesauri, self.thesaurus_relation_type_uri, self.thesaurus_relation_is_specialisation) labels = t.retrieve_concept_labels(self.graph, allowed=concepts, langs=self.langs) nfautomat = nfa.Nfa() if self.handle_title_case: case_handler = case_handlers.title_case_handler else: case_handler = case_handlers.sentence_case_handler expansion_funs = expansion.collect_expansion_functions( extract_upper_case_from_braces=self.extract_upper_case_from_braces, extract_any_case_from_braces=self.extract_any_case_from_braces, expand_ampersand_with_spaces=self.expand_ampersand_with_spaces, expand_abbreviation_with_punctuation=( self.expand_abbreviation_with_punctuation), ) if self.simple_english_plural_rules: plural_fun = expansion.simple_english_plural_fun else: def plural_fun(x): return x for concept, label in labels: expanded = label for f in expansion_funs: expanded = f(expanded) _handle_construction( construction.ConstructionState( nfautomat, plural_fun(case_handler(expanded)), str(concept)), concept, label) nfautomat.remove_empty_transitions() converter = conversion.NfaToDfaConverter(nfautomat) self.dfa_ = converter.start_conversion() self.pipeline_ = Pipeline([ ("Combined Features", ColumnTransformer([("Thesaurus Features", thesaurus_features, 0), ("Text Features", mk_text_features(), 1)])), ("Classifier", DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100)) ])
def epsilon_tree(): graph = nfa.Nfa() for _ in range(17): graph.add_state() for i in range(8): graph.add_empty_transition(i, 2*i+1) graph.add_empty_transition(i, 2*i+2) graph.add_symbol_transition(i, 2*i+1, symbol0) graph.add_symbol_transition(i, 2*i+2, symbol0) graph.add_non_word_char_transition(i, 2*i+1) graph.add_non_word_char_transition(i, 2*i+2) return graph
def test_init_with_empty_graph(): graph = nfa.Nfa() construction = c.ConstructionState(graph, expression, accept) construction._set_up() assert len(graph.states) == 3 assert graph.starts == [] assert graph.states[0].non_word_char_transitions == {1} assert graph.states[1].empty_transitions == {2} assert construction.append_to == [2] assert construction.expression == expression assert construction.before_braces == [[1], [2]] assert construction.dangling_alternations.stack == [] assert not construction.escape_next assert construction.accept == accept
def regression_test_graph(): automaton = nfa.Nfa() concept_tuples = [ (label_global, id_global), (label_economic, id_economic), (label_crisis, id_crisis), (label_global_economic, id_global_economic), (label_economic_crisis, id_economic_crisis), (label_global_economic_crisis, id_global_economic_crisis), ] for label, concept_id in concept_tuples: const.ConstructionState(automaton, label, concept_id).construct() automaton.remove_empty_transitions() dfa = conv.NfaToDfaConverter(automaton).start_conversion() return dfa
def input_graph(): graph = nfa.Nfa() for _ in range(6): graph.add_state() graph.add_start(0) graph.add_start(1) graph.add_symbol_transition(0, 2, symbol0) graph.add_symbol_transition(1, 0, symbol0) graph.add_symbol_transition(0, 3, symbol1) graph.add_symbol_transition(1, 3, symbol1) graph.add_symbol_transition(2, 4, symbol0) graph.add_symbol_transition(4, 5, symbol1) graph.add_non_word_char_transition(1, 5) graph.add_acceptance(5, accept) return graph
def two_state_graph(): graph = nfa.Nfa() graph.add_state() graph.add_state() return graph