Beispiel #1
0
def test_combination_0010():
    conf = "00100"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    assert len(funs) == 2
    assert funs[0] == e.base_expansion
    assert funs[1].__name__ == _name_ampersand_fun
Beispiel #2
0
def test_combination_1000():
    conf = "10000"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    assert len(funs) == 2
    assert funs[0] == e.base_expansion
    assert funs[1].__name__ == _name_replacer
    assert funs[1](lower_braces_string) == lower_braces_string
Beispiel #3
0
def test_all_lower_braces():
    conf = "11110"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    string = "(lower) lower case explanation"
    for fun in funs:
        string = fun(string)
    assert string == "lower"
Beispiel #4
0
def test_all_ampersand_from_braces():
    conf = "11110"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    string = "(R&D) research and discovery"
    for fun in funs:
        string = fun(string)
    assert string == "R ?& ?D"
Beispiel #5
0
def test_all_abbreviation_from_braces():
    conf = "11110"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    string = "GDP (gross domestic product)"
    for fun in funs:
        string = fun(string)
    assert string == "G\\.?D\\.?P\\.?"
Beispiel #6
0
def test_combination_0101():
    conf = "01010"
    conf_map = _create_config_map(conf)
    funs = e.collect_expansion_functions(**conf_map)
    assert len(funs) == 3
    assert funs[0] == e.base_expansion
    assert funs[1].__name__ == _name_replacer
    assert funs[2].__name__ == _name_abbreviation_fun
    assert funs[1](lower_braces_string) == lower_braces_content
Beispiel #7
0
    def _init(self):
        all_deprecated = set(t.extract_deprecated(self.graph))
        concepts = set(
            t.extract_by_type_uri(self.graph,
                                  self.concept_type_uri,
                                  remove=all_deprecated))
        thesauri = set(
            t.extract_by_type_uri(self.graph,
                                  self.sub_thesaurus_type_uri,
                                  remove=all_deprecated))
        self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts))))
        thesaurus_features = ThesaurusFeatureTransformation(
            self.graph, concepts, thesauri, self.thesaurus_relation_type_uri,
            self.thesaurus_relation_is_specialisation)
        labels = t.retrieve_concept_labels(self.graph,
                                           allowed=concepts,
                                           langs=self.langs)
        nfautomat = nfa.Nfa()
        if self.handle_title_case:
            case_handler = case_handlers.title_case_handler
        else:
            case_handler = case_handlers.sentence_case_handler
        expansion_funs = expansion.collect_expansion_functions(
            extract_upper_case_from_braces=self.extract_upper_case_from_braces,
            extract_any_case_from_braces=self.extract_any_case_from_braces,
            expand_ampersand_with_spaces=self.expand_ampersand_with_spaces,
            expand_abbreviation_with_punctuation=(
                self.expand_abbreviation_with_punctuation),
        )
        if self.simple_english_plural_rules:
            plural_fun = expansion.simple_english_plural_fun
        else:

            def plural_fun(x):
                return x

        for concept, label in labels:
            expanded = label
            for f in expansion_funs:
                expanded = f(expanded)
            _handle_construction(
                construction.ConstructionState(
                    nfautomat, plural_fun(case_handler(expanded)),
                    str(concept)), concept, label)
        nfautomat.remove_empty_transitions()
        converter = conversion.NfaToDfaConverter(nfautomat)
        self.dfa_ = converter.start_conversion()
        self.pipeline_ = Pipeline([
            ("Combined Features",
             ColumnTransformer([("Thesaurus Features", thesaurus_features, 0),
                                ("Text Features", mk_text_features(), 1)])),
            ("Classifier",
             DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100))
        ])
Beispiel #8
0
def test_combination_0000():
    conf = "00000"
    conf_map = _create_config_map(conf)
    assert e.collect_expansion_functions(**conf_map) == [e.base_expansion]