def _init(self): all_deprecated = set(t.extract_deprecated(self.graph)) concepts = set( t.extract_by_type_uri(self.graph, self.concept_type_uri, remove=all_deprecated)) thesauri = set( t.extract_by_type_uri(self.graph, self.sub_thesaurus_type_uri, remove=all_deprecated)) self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts)))) thesaurus_features = ThesaurusFeatureTransformation( self.graph, concepts, thesauri, self.thesaurus_relation_type_uri, self.thesaurus_relation_is_specialisation) labels = t.retrieve_concept_labels(self.graph, allowed=concepts, langs=self.langs) nfautomat = nfa.Nfa() if self.handle_title_case: case_handler = case_handlers.title_case_handler else: case_handler = case_handlers.sentence_case_handler expansion_funs = expansion.collect_expansion_functions( extract_upper_case_from_braces=self.extract_upper_case_from_braces, extract_any_case_from_braces=self.extract_any_case_from_braces, expand_ampersand_with_spaces=self.expand_ampersand_with_spaces, expand_abbreviation_with_punctuation=( self.expand_abbreviation_with_punctuation), ) if self.simple_english_plural_rules: plural_fun = expansion.simple_english_plural_fun else: def plural_fun(x): return x for concept, label in labels: expanded = label for f in expansion_funs: expanded = f(expanded) _handle_construction( construction.ConstructionState( nfautomat, plural_fun(case_handler(expanded)), str(concept)), concept, label) nfautomat.remove_empty_transitions() converter = conversion.NfaToDfaConverter(nfautomat) self.dfa_ = converter.start_conversion() self.pipeline_ = Pipeline([ ("Combined Features", ColumnTransformer([("Thesaurus Features", thesaurus_features, 0), ("Text Features", mk_text_features(), 1)])), ("Classifier", DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100)) ])
def test_integration(typed_label_graph, concept_set): result = list( t.retrieve_concept_labels( typed_label_graph, allowed=concept_set, langs={"en"}, )) assert (c.concept_ref_printed, c.concept_prefLabel_printed_en.value) in result assert (c.concept_ref_printed, c.concept_altLabel_printed_en.value) in result assert (c.concept_ref_printed, c.concept_prefLabel_printed_de.value) not in result assert c.thsys_ref_print not in map(lambda t: t[0], result)
def test_prefix_option(label_graph, concept_set, patch_module): t.retrieve_concept_labels(label_graph, concept_set) t.filter_subject_tuples_from_set.assert_called()
def test_none_prefix_option(label_graph, patch_module): t.retrieve_concept_labels(label_graph, allowed=None) t.filter_subject_tuples_from_set.assert_not_called()
def test_language_option(label_graph, patch_module): t.retrieve_concept_labels(label_graph, langs={"en"}) t._filter_by_langs.assert_called()