def test_conversion(input_graph, mocker): converter = c.NfaToDfaConverter(input_graph) # Mainly interested in the end result. # Still mock to see that there are no unknown elements add_spy = mocker.spy(converter.dfa, "add_state") symbol_transition_spy = mocker.spy(converter.dfa, "set_symbol_transition") nwc_transition_spy = mocker.spy( converter.dfa, "set_non_word_char_transition") acceptance_spy = mocker.spy(converter.dfa, "add_acceptances") result = converter.start_conversion() assert converter.queue.qsize() == 0 assert len(converter.state_cache) == 6 assert len(converter.state_represents) == 6 # One less call than states in the graph, # because the initial state was added during construction. assert add_spy.call_count == 5 assert nwc_transition_spy.call_count == 1 assert symbol_transition_spy.call_count == 7 assert acceptance_spy.call_count == 1 state01 = converter.state_cache[frozenset([0, 1])] state02 = converter.state_cache[frozenset([0, 2])] assert result.states[state01].symbol_transitions[symbol0] == state02 state3 = converter.state_cache[frozenset([3])] assert result.states[state01].symbol_transitions[symbol1] == state3 assert result.states[state02].symbol_transitions[symbol1] == state3 state24 = converter.state_cache[frozenset([2, 4])] assert result.states[state02].symbol_transitions[symbol0] == state24 state5 = converter.state_cache[frozenset([5])] assert result.states[state24].symbol_transitions[symbol1] == state5 state4 = converter.state_cache[frozenset([4])] assert result.states[state24].symbol_transitions[symbol0] == state4 assert result.states[state4].symbol_transitions[symbol1] == state5 assert result.states[state5].accepts == [accept]
def test_alternation_right_kleene(): graph = nfa.Nfa() construction = const.ConstructionState(graph, 'a|b|c*', accept) construction.construct() graph.remove_empty_transitions() dfa_graph = conv.NfaToDfaConverter(graph).start_conversion() res = list(dfa_graph.search('')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('ccccc')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('a')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('b')) assert len(res) == 1 assert res[0][0] == accept res = list(dfa_graph.search('bb')) assert len(res) == 0 res = list(dfa_graph.search('aa')) assert len(res) == 0 res = list(dfa_graph.search('ca')) assert len(res) == 0
def test_transition_collection(input_graph): converter = c.NfaToDfaConverter(input_graph) state_set = {0, 1, 3, 5} (symbol_transitions, non_word_char_transitions, accepts) = converter._collect_nfa_transitions(state_set) for state_id in state_set: state = input_graph.states[state_id] for k, idxs in state.symbol_transitions.items(): for idx in idxs: assert idx in symbol_transitions[k] for v in state.non_word_char_transitions: assert v in non_word_char_transitions for v in state.accepts: assert v in accepts for k, idxs in symbol_transitions.items(): for idx in idxs: found = False for state in state_set: nfa_state = input_graph.states[state] if idx in nfa_state.symbol_transitions.get(k, []): found = True assert found for v in non_word_char_transitions: found = False for state in state_set: if v in input_graph.states[state].non_word_char_transitions: found = True assert found for v in accepts: found = False for state in state_set: if v in input_graph.states[state].accepts: found = True assert found
def test_retrieves_existing_state(input_graph): converter = c.NfaToDfaConverter(input_graph) new_set = frozenset(input_graph.starts) converter._get_or_create_dfa_state(new_set) assert len(converter.dfa.states) == 1 assert converter.queue.qsize() == 1 assert len(converter.state_cache) == 1 assert len(converter.state_represents) == 1
def test_initialization(input_graph): converter = c.NfaToDfaConverter(input_graph) start_set = frozenset(input_graph.starts) assert len(converter.dfa.states) == 1 assert converter.queue.qsize() == 1 assert len(converter.state_cache) == 1 assert len(converter.state_represents) == 1 assert converter.state_represents[0] == list(start_set) assert converter.state_cache[start_set] == 0
def test_creates_new_state(input_graph): converter = c.NfaToDfaConverter(input_graph) new_set = frozenset([0, 2]) converter._get_or_create_dfa_state(new_set) assert len(converter.dfa.states) == 2 assert converter.queue.qsize() == 2 assert len(converter.state_cache) == 2 assert len(converter.state_represents) == 2 assert converter.state_represents[1] == list(new_set) assert converter.state_cache[new_set] == 1
def _init(self): all_deprecated = set(t.extract_deprecated(self.graph)) concepts = set( t.extract_by_type_uri(self.graph, self.concept_type_uri, remove=all_deprecated)) thesauri = set( t.extract_by_type_uri(self.graph, self.sub_thesaurus_type_uri, remove=all_deprecated)) self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts)))) thesaurus_features = ThesaurusFeatureTransformation( self.graph, concepts, thesauri, self.thesaurus_relation_type_uri, self.thesaurus_relation_is_specialisation) labels = t.retrieve_concept_labels(self.graph, allowed=concepts, langs=self.langs) nfautomat = nfa.Nfa() if self.handle_title_case: case_handler = case_handlers.title_case_handler else: case_handler = case_handlers.sentence_case_handler expansion_funs = expansion.collect_expansion_functions( extract_upper_case_from_braces=self.extract_upper_case_from_braces, extract_any_case_from_braces=self.extract_any_case_from_braces, expand_ampersand_with_spaces=self.expand_ampersand_with_spaces, expand_abbreviation_with_punctuation=( self.expand_abbreviation_with_punctuation), ) if self.simple_english_plural_rules: plural_fun = expansion.simple_english_plural_fun else: def plural_fun(x): return x for concept, label in labels: expanded = label for f in expansion_funs: expanded = f(expanded) _handle_construction( construction.ConstructionState( nfautomat, plural_fun(case_handler(expanded)), str(concept)), concept, label) nfautomat.remove_empty_transitions() converter = conversion.NfaToDfaConverter(nfautomat) self.dfa_ = converter.start_conversion() self.pipeline_ = Pipeline([ ("Combined Features", ColumnTransformer([("Thesaurus Features", thesaurus_features, 0), ("Text Features", mk_text_features(), 1)])), ("Classifier", DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100)) ])
def regression_test_graph(): automaton = nfa.Nfa() concept_tuples = [ (label_global, id_global), (label_economic, id_economic), (label_crisis, id_crisis), (label_global_economic, id_global_economic), (label_economic_crisis, id_economic_crisis), (label_global_economic_crisis, id_global_economic_crisis), ] for label, concept_id in concept_tuples: const.ConstructionState(automaton, label, concept_id).construct() automaton.remove_empty_transitions() dfa = conv.NfaToDfaConverter(automaton).start_conversion() return dfa
def test_transition_creation(input_graph): converter = c.NfaToDfaConverter(input_graph) set13 = frozenset([1, 3]) set24 = frozenset([2, 4]) set5 = frozenset([5]) acceptance = {accept} non_word_char_transitions = set(set24) symbol_transitions = {symbol0: set(set13), symbol1: set(set5)} converter._create_dfa_transitions(0, symbol_transitions, non_word_char_transitions, acceptance) state = converter.dfa.states[0] assert state.accepts == [accept] assert state.non_word_char_transition == converter.state_cache[set24] assert state.symbol_transitions[symbol0] == converter.state_cache[set13] assert state.symbol_transitions[symbol1] == converter.state_cache[set5]