Example #1
0
def _get_parent_to_category_set_mapping() -> dict:
    global __CATEGORY_SETS__
    if '__CATEGORY_SETS__' not in globals():
        __CATEGORY_SETS__ = util.load_or_create_cache('dbpedia_category_sets',
                                                      _compute_category_sets)

    return __CATEGORY_SETS__
Example #2
0
def get_statistics(category: str) -> dict:
    """Return information about the amounts/frequencies of types and properties of a category's resources."""
    global __CATEGORY_STATISTICS__
    if '__CATEGORY_STATISTICS__' not in globals():
        __CATEGORY_STATISTICS__ = util.load_or_create_cache(
            'dbpedia_category_statistics', _compute_category_statistics)
    return __CATEGORY_STATISTICS__[category]
Example #3
0
def get_inverse_lexicalisations(text: str) -> dict:
    global __RESOURCE_INVERSE_LEXICALISATIONS__
    if '__RESOURCE_INVERSE_LEXICALISATIONS__' not in globals():
        __RESOURCE_INVERSE_LEXICALISATIONS__ = util.load_or_create_cache(
            'dbpedia_resource_inverse_lexicalisations',
            _compute_inverse_lexicalisations)
    return __RESOURCE_INVERSE_LEXICALISATIONS__[text.lower()] if text.lower(
    ) in __RESOURCE_INVERSE_LEXICALISATIONS__ else {}
Example #4
0
def is_functional(dbp_predicate: str) -> bool:
    global __PREDICATE_FUNCTIONAL__
    if '__PREDICATE_FUNCTIONAL__' not in globals():
        __PREDICATE_FUNCTIONAL__ = util.load_or_create_cache(
            'dbpedia_functional_predicates', _create_functional_predicate_dict)

    return __PREDICATE_FUNCTIONAL__[
        dbp_predicate] if dbp_predicate in __PREDICATE_FUNCTIONAL__ else False
Example #5
0
def _get_label_mapping() -> dict:
    global __RESOURCE_LABEL_MAPPING__
    if '__RESOURCE_LABEL_MAPPING__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([
            util.get_data_file('files.dbpedia.labels')
        ], rdf_util.PREDICATE_LABEL)
        __RESOURCE_LABEL_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_labels', initializer)

    return __RESOURCE_LABEL_MAPPING__
Example #6
0
def get_categories() -> set:
    global __CATEGORIES__
    if '__CATEGORIES__' not in globals():
        initializer = lambda: set(
            rdf_util.create_single_val_dict_from_rdf([
                util.get_data_file('files.dbpedia.categories')
            ], rdf_util.PREDICATE_TYPE))
        __CATEGORIES__ = util.load_or_create_cache('dbpedia_categories',
                                                   initializer)

    return __CATEGORIES__
Example #7
0
def get_children(category: str) -> set:
    global __CHILDREN__
    if '__CHILDREN__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.categories')],
            rdf_util.PREDICATE_BROADER,
            reverse_key=True)
        __CHILDREN__ = util.load_or_create_cache('dbpedia_category_children',
                                                 initializer)

    return __CHILDREN__[category].difference({category})
Example #8
0
def get_resources(category: str) -> set:
    global __CATEGORY_RESOURCES__
    if '__CATEGORY_RESOURCES__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.article_categories')],
            rdf_util.PREDICATE_SUBJECT,
            reverse_key=True)
        __CATEGORY_RESOURCES__ = util.load_or_create_cache(
            'dbpedia_category_resources', initializer)

    return __CATEGORY_RESOURCES__[category]
Example #9
0
def get_resource_property_mapping() -> dict:
    global __RESOURCE_PROPERTY_MAPPING__
    if '__RESOURCE_PROPERTY_MAPPING__' not in globals():
        property_files = [
            util.get_data_file('files.dbpedia.mappingbased_literals'),
            util.get_data_file('files.dbpedia.mappingbased_objects')
        ]
        initializer = lambda: rdf_util.create_dict_from_rdf(property_files)
        __RESOURCE_PROPERTY_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_properties', initializer)

    return __RESOURCE_PROPERTY_MAPPING__
Example #10
0
def _get_resource_type_mapping() -> dict:
    global __RESOURCE_TYPE_MAPPING__
    if '__RESOURCE_TYPE_MAPPING__' not in globals():
        type_files = [
            util.get_data_file('files.dbpedia.instance_types'),
            util.get_data_file('files.dbpedia.transitive_instance_types'),
        ]
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            type_files, rdf_util.PREDICATE_TYPE)
        __RESOURCE_TYPE_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_type_mapping', initializer)

    return __RESOURCE_TYPE_MAPPING__
Example #11
0
def resolve_redirect(dbp_resource: str, visited=None) -> str:
    """Return the resource to which `dbp_resource` redirects (if any) or `dbp_resource` itself."""
    global __REDIRECTS__
    if '__REDIRECTS__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([
            util.get_data_file('files.dbpedia.redirects')
        ], rdf_util.PREDICATE_REDIRECTS)
        __REDIRECTS__ = util.load_or_create_cache('dbpedia_resource_redirects',
                                                  initializer)

    if dbp_resource in __REDIRECTS__:
        visited = visited or set()
        if dbp_resource not in visited:
            return resolve_redirect(__REDIRECTS__[dbp_resource],
                                    visited | {dbp_resource})
    return dbp_resource
Example #12
0
File: nlp.py Project: nheist/Cat2Ax
def parse(text: str, disable_normalization=False, skip_cache=False) -> Doc:
    if not disable_normalization:
        split_text = text.split(' ')
        if len(split_text) == 1 or (len(split_text) > 1 and not (text[1].isupper() or split_text[1].istitle())):
            if len(text) > 1:
                text = text[0].lower() + text[1:]

    if skip_cache:
        return parser(text)

    global __NLP_CACHE__, __NLP_CACHE_CHANGED__
    if '__NLP_CACHE__' not in globals():
        __NLP_CACHE__ = util.load_or_create_cache(SPACY_CACHE_ID, lambda: dict())
        __NLP_CACHE_CHANGED__ = False

    text_hash = hashlib.md5(text.encode('utf-8')).digest()
    if text_hash in __NLP_CACHE__:
        return __NLP_CACHE__[text_hash]

    parsed_text = parser(text)
    __NLP_CACHE__[text_hash] = parsed_text
    __NLP_CACHE_CHANGED__ = True
    return parsed_text