def _get_parent_to_category_set_mapping() -> dict: global __CATEGORY_SETS__ if '__CATEGORY_SETS__' not in globals(): __CATEGORY_SETS__ = util.load_or_create_cache('dbpedia_category_sets', _compute_category_sets) return __CATEGORY_SETS__
def get_statistics(category: str) -> dict: """Return information about the amounts/frequencies of types and properties of a category's resources.""" global __CATEGORY_STATISTICS__ if '__CATEGORY_STATISTICS__' not in globals(): __CATEGORY_STATISTICS__ = util.load_or_create_cache( 'dbpedia_category_statistics', _compute_category_statistics) return __CATEGORY_STATISTICS__[category]
def get_inverse_lexicalisations(text: str) -> dict: global __RESOURCE_INVERSE_LEXICALISATIONS__ if '__RESOURCE_INVERSE_LEXICALISATIONS__' not in globals(): __RESOURCE_INVERSE_LEXICALISATIONS__ = util.load_or_create_cache( 'dbpedia_resource_inverse_lexicalisations', _compute_inverse_lexicalisations) return __RESOURCE_INVERSE_LEXICALISATIONS__[text.lower()] if text.lower( ) in __RESOURCE_INVERSE_LEXICALISATIONS__ else {}
def is_functional(dbp_predicate: str) -> bool: global __PREDICATE_FUNCTIONAL__ if '__PREDICATE_FUNCTIONAL__' not in globals(): __PREDICATE_FUNCTIONAL__ = util.load_or_create_cache( 'dbpedia_functional_predicates', _create_functional_predicate_dict) return __PREDICATE_FUNCTIONAL__[ dbp_predicate] if dbp_predicate in __PREDICATE_FUNCTIONAL__ else False
def _get_label_mapping() -> dict: global __RESOURCE_LABEL_MAPPING__ if '__RESOURCE_LABEL_MAPPING__' not in globals(): initializer = lambda: rdf_util.create_single_val_dict_from_rdf([ util.get_data_file('files.dbpedia.labels') ], rdf_util.PREDICATE_LABEL) __RESOURCE_LABEL_MAPPING__ = util.load_or_create_cache( 'dbpedia_resource_labels', initializer) return __RESOURCE_LABEL_MAPPING__
def get_categories() -> set: global __CATEGORIES__ if '__CATEGORIES__' not in globals(): initializer = lambda: set( rdf_util.create_single_val_dict_from_rdf([ util.get_data_file('files.dbpedia.categories') ], rdf_util.PREDICATE_TYPE)) __CATEGORIES__ = util.load_or_create_cache('dbpedia_categories', initializer) return __CATEGORIES__
def get_children(category: str) -> set: global __CHILDREN__ if '__CHILDREN__' not in globals(): initializer = lambda: rdf_util.create_multi_val_dict_from_rdf( [util.get_data_file('files.dbpedia.categories')], rdf_util.PREDICATE_BROADER, reverse_key=True) __CHILDREN__ = util.load_or_create_cache('dbpedia_category_children', initializer) return __CHILDREN__[category].difference({category})
def get_resources(category: str) -> set: global __CATEGORY_RESOURCES__ if '__CATEGORY_RESOURCES__' not in globals(): initializer = lambda: rdf_util.create_multi_val_dict_from_rdf( [util.get_data_file('files.dbpedia.article_categories')], rdf_util.PREDICATE_SUBJECT, reverse_key=True) __CATEGORY_RESOURCES__ = util.load_or_create_cache( 'dbpedia_category_resources', initializer) return __CATEGORY_RESOURCES__[category]
def get_resource_property_mapping() -> dict: global __RESOURCE_PROPERTY_MAPPING__ if '__RESOURCE_PROPERTY_MAPPING__' not in globals(): property_files = [ util.get_data_file('files.dbpedia.mappingbased_literals'), util.get_data_file('files.dbpedia.mappingbased_objects') ] initializer = lambda: rdf_util.create_dict_from_rdf(property_files) __RESOURCE_PROPERTY_MAPPING__ = util.load_or_create_cache( 'dbpedia_resource_properties', initializer) return __RESOURCE_PROPERTY_MAPPING__
def _get_resource_type_mapping() -> dict: global __RESOURCE_TYPE_MAPPING__ if '__RESOURCE_TYPE_MAPPING__' not in globals(): type_files = [ util.get_data_file('files.dbpedia.instance_types'), util.get_data_file('files.dbpedia.transitive_instance_types'), ] initializer = lambda: rdf_util.create_multi_val_dict_from_rdf( type_files, rdf_util.PREDICATE_TYPE) __RESOURCE_TYPE_MAPPING__ = util.load_or_create_cache( 'dbpedia_resource_type_mapping', initializer) return __RESOURCE_TYPE_MAPPING__
def resolve_redirect(dbp_resource: str, visited=None) -> str: """Return the resource to which `dbp_resource` redirects (if any) or `dbp_resource` itself.""" global __REDIRECTS__ if '__REDIRECTS__' not in globals(): initializer = lambda: rdf_util.create_single_val_dict_from_rdf([ util.get_data_file('files.dbpedia.redirects') ], rdf_util.PREDICATE_REDIRECTS) __REDIRECTS__ = util.load_or_create_cache('dbpedia_resource_redirects', initializer) if dbp_resource in __REDIRECTS__: visited = visited or set() if dbp_resource not in visited: return resolve_redirect(__REDIRECTS__[dbp_resource], visited | {dbp_resource}) return dbp_resource
def parse(text: str, disable_normalization=False, skip_cache=False) -> Doc: if not disable_normalization: split_text = text.split(' ') if len(split_text) == 1 or (len(split_text) > 1 and not (text[1].isupper() or split_text[1].istitle())): if len(text) > 1: text = text[0].lower() + text[1:] if skip_cache: return parser(text) global __NLP_CACHE__, __NLP_CACHE_CHANGED__ if '__NLP_CACHE__' not in globals(): __NLP_CACHE__ = util.load_or_create_cache(SPACY_CACHE_ID, lambda: dict()) __NLP_CACHE_CHANGED__ = False text_hash = hashlib.md5(text.encode('utf-8')).digest() if text_hash in __NLP_CACHE__: return __NLP_CACHE__[text_hash] parsed_text = parser(text) __NLP_CACHE__[text_hash] = parsed_text __NLP_CACHE_CHANGED__ = True return parsed_text