Ejemplo n.º 1
0
    def _init(self):
        all_deprecated = set(t.extract_deprecated(self.graph))
        concepts = set(
            t.extract_by_type_uri(self.graph,
                                  self.concept_type_uri,
                                  remove=all_deprecated))
        thesauri = set(
            t.extract_by_type_uri(self.graph,
                                  self.sub_thesaurus_type_uri,
                                  remove=all_deprecated))
        self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts))))
        thesaurus_features = ThesaurusFeatureTransformation(
            self.graph, concepts, thesauri, self.thesaurus_relation_type_uri,
            self.thesaurus_relation_is_specialisation)
        labels = t.retrieve_concept_labels(self.graph,
                                           allowed=concepts,
                                           langs=self.langs)
        nfautomat = nfa.Nfa()
        if self.handle_title_case:
            case_handler = case_handlers.title_case_handler
        else:
            case_handler = case_handlers.sentence_case_handler
        expansion_funs = expansion.collect_expansion_functions(
            extract_upper_case_from_braces=self.extract_upper_case_from_braces,
            extract_any_case_from_braces=self.extract_any_case_from_braces,
            expand_ampersand_with_spaces=self.expand_ampersand_with_spaces,
            expand_abbreviation_with_punctuation=(
                self.expand_abbreviation_with_punctuation),
        )
        if self.simple_english_plural_rules:
            plural_fun = expansion.simple_english_plural_fun
        else:

            def plural_fun(x):
                return x

        for concept, label in labels:
            expanded = label
            for f in expansion_funs:
                expanded = f(expanded)
            _handle_construction(
                construction.ConstructionState(
                    nfautomat, plural_fun(case_handler(expanded)),
                    str(concept)), concept, label)
        nfautomat.remove_empty_transitions()
        converter = conversion.NfaToDfaConverter(nfautomat)
        self.dfa_ = converter.start_conversion()
        self.pipeline_ = Pipeline([
            ("Combined Features",
             ColumnTransformer([("Thesaurus Features", thesaurus_features, 0),
                                ("Text Features", mk_text_features(), 1)])),
            ("Classifier",
             DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100))
        ])
Ejemplo n.º 2
0
def test_integration(typed_label_graph, concept_set):
    result = list(
        t.retrieve_concept_labels(
            typed_label_graph,
            allowed=concept_set,
            langs={"en"},
        ))
    assert (c.concept_ref_printed,
            c.concept_prefLabel_printed_en.value) in result
    assert (c.concept_ref_printed,
            c.concept_altLabel_printed_en.value) in result
    assert (c.concept_ref_printed,
            c.concept_prefLabel_printed_de.value) not in result
    assert c.thsys_ref_print not in map(lambda t: t[0], result)
Ejemplo n.º 3
0
def test_prefix_option(label_graph, concept_set, patch_module):
    t.retrieve_concept_labels(label_graph, concept_set)
    t.filter_subject_tuples_from_set.assert_called()
Ejemplo n.º 4
0
def test_none_prefix_option(label_graph, patch_module):
    t.retrieve_concept_labels(label_graph, allowed=None)
    t.filter_subject_tuples_from_set.assert_not_called()
Ejemplo n.º 5
0
def test_language_option(label_graph, patch_module):
    t.retrieve_concept_labels(label_graph, langs={"en"})
    t._filter_by_langs.assert_called()