def _get_training_data(graph) -> tuple: global __SUBJECT_ENTITY_TRAINING_DATA__ if '__SUBJECT_ENTITY_TRAINING_DATA__' not in globals(): __SUBJECT_ENTITY_TRAINING_DATA__ = utils.load_or_create_cache( 'subject_entity_training_data', lambda: _retrieve_training_data(graph)) return __SUBJECT_ENTITY_TRAINING_DATA__
def _get_subject_entity_predictions(graph) -> dict: global __SUBJECT_ENTITY_PREDICTIONS__ if '__SUBJECT_ENTITY_PREDICTIONS__' not in globals(): __SUBJECT_ENTITY_PREDICTIONS__ = utils.load_or_create_cache( 'subject_entity_predictions', lambda: _make_subject_entity_predictions(graph)) return __SUBJECT_ENTITY_PREDICTIONS__
def get_disjoint_types(dbp_type) -> set: global __DISJOINT_TYPES__ if '__DISJOINT_TYPES__' not in globals(): __DISJOINT_TYPES__ = utils.load_or_create_cache( 'dbpedia_heuristic_disjoint_types', _compute_disjoint_types) return __DISJOINT_TYPES__[dbp_type]
def get_inverse_resource_property_mapping() -> dict: """Return a mapping from DBpedia resources to a dict containing property-value assignments (containing inverted facts of DBpedia).""" global __INVERSE_RESOURCE_PROPERTY_MAPPING__ if '__INVERSE_RESOURCE_PROPERTY_MAPPING__' not in globals(): initializer = lambda: rdf_util.create_dict_from_rdf([utils.get_data_file('files.dbpedia.mappingbased_objects')], reverse_key=True) __INVERSE_RESOURCE_PROPERTY_MAPPING__ = utils.load_or_create_cache('dbpedia_inverse_resource_properties', initializer) return __INVERSE_RESOURCE_PROPERTY_MAPPING__
def get_page_entities(graph) -> dict: global __PAGE_ENTITES__ if '__PAGE_ENTITES__' not in globals(): __PAGE_ENTITES__ = utils.load_or_create_cache( 'listing_page_entities', lambda: extract.extract_page_entities(graph)) return __PAGE_ENTITES__
def get_resources(category: str) -> set: """Return all resources of the given category.""" global __CATEGORY_RESOURCES__ if '__CATEGORY_RESOURCES__' not in globals(): initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.category_articles')], rdf_util.PREDICATE_SUBJECT, reverse_key=True) __CATEGORY_RESOURCES__ = utils.load_or_create_cache('dbpedia_category_resources', initializer) return __CATEGORY_RESOURCES__[category]
def get_resource_categories(dbp_resource: str) -> set: """Return all categories the given resource is contained in.""" global __RESOURCE_CATEGORIES__ if '__RESOURCE_CATEGORIES__' not in globals(): initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.category_articles')], rdf_util.PREDICATE_SUBJECT) __RESOURCE_CATEGORIES__ = utils.load_or_create_cache('dbpedia_resource_categories', initializer) return __RESOURCE_CATEGORIES__[dbp_resource]
def get_cyclefree_wikitaxonomy_listgraph() -> ListGraph: """Retrieve list graph with filtered edges and resolved cycles.""" global __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__ if '__CYCLEFREE_WIKITAXONOMY_LISTGRAPH__' not in globals(): initializer = lambda: get_wikitaxonomy_listgraph().append_unconnected() __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__ = utils.load_or_create_cache( 'listgraph_cyclefree', initializer) return __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__
def get_wikitaxonomy_listgraph() -> ListGraph: """Retrieve list graph with filtered edges.""" global __WIKITAXONOMY_LISTGRAPH__ if '__WIKITAXONOMY_LISTGRAPH__' not in globals(): initializer = lambda: get_base_listgraph().remove_unrelated_edges() __WIKITAXONOMY_LISTGRAPH__ = utils.load_or_create_cache( 'listgraph_wikitaxonomy', initializer) return __WIKITAXONOMY_LISTGRAPH__
def get_resource_property_mapping() -> dict: """Return a mapping from DBpedia resources to a dict containing property-value assignments (containing facts of DBpedia).""" global __RESOURCE_PROPERTY_MAPPING__ if '__RESOURCE_PROPERTY_MAPPING__' not in globals(): property_files = [utils.get_data_file('files.dbpedia.mappingbased_literals'), utils.get_data_file('files.dbpedia.mappingbased_objects')] initializer = lambda: rdf_util.create_dict_from_rdf(property_files) __RESOURCE_PROPERTY_MAPPING__ = utils.load_or_create_cache('dbpedia_resource_properties', initializer) return __RESOURCE_PROPERTY_MAPPING__
def get_range(dbp_predicate: str) -> Optional[str]: global __RANGES__ if '__RANGES__' not in globals(): __RANGES__ = defaultdict( lambda: None, utils.load_or_create_cache('dbpedia_heuristic_ranges', _compute_ranges)) return dbp_store.get_range(dbp_predicate) or __RANGES__[dbp_predicate]
def get_cyclefree_wikitaxonomy_graph() -> CategoryGraph: """Retrieve the cycle-free category graph with filtered categories and edges.""" global __CYCLEFREE_WIKITAXONOMY_GRAPH__ if '__CYCLEFREE_WIKITAXONOMY_GRAPH__' not in globals(): initializer = lambda: get_wikitaxonomy_graph().append_unconnected() __CYCLEFREE_WIKITAXONOMY_GRAPH__ = utils.load_or_create_cache( 'catgraph_cyclefree', initializer) return __CYCLEFREE_WIKITAXONOMY_GRAPH__
def get_base_graph() -> CaLiGraph: """Retrieve graph created from categories and lists.""" global __BASE_GRAPH__ if '__BASE_GRAPH__' not in globals(): initializer = lambda: CaLiGraph.build_graph().append_unconnected() __BASE_GRAPH__ = utils.load_or_create_cache('caligraph_base', initializer) return __BASE_GRAPH__
def get_axiom_graph() -> CaLiGraph: """Retrieve CaLiGraph enriched with axioms from the Cat2Ax approach.""" global __AXIOM_GRAPH__ if '__AXIOM_GRAPH__' not in globals(): initializer = lambda: get_merged_ontology_graph( ).remove_transitive_edges().compute_axioms() __AXIOM_GRAPH__ = utils.load_or_create_cache('caligraph_axiomatized', initializer) return __AXIOM_GRAPH__
def get_merged_ontology_graph() -> CaLiGraph: """Retrieve base graph joined with DBpedia ontology.""" global __MERGED_ONTOLOGY_GRAPH__ if '__MERGED_ONTOLOGY_GRAPH__' not in globals(): initializer = lambda: get_base_graph().copy().merge_ontology( ).append_unconnected() __MERGED_ONTOLOGY_GRAPH__ = utils.load_or_create_cache( 'caligraph_merged_ontology', initializer) return __MERGED_ONTOLOGY_GRAPH__
def get_merged_listgraph() -> ListGraph: """Retrieve list graph with filtered edges, resolved cycles, and merged lists.""" global __MERGED_LISTGRAPH__ if '__MERGED_LISTGRAPH__' not in globals(): initializer = lambda: get_cyclefree_wikitaxonomy_listgraph( ).merge_nodes().remove_leaf_listcategories().remove_transitive_edges() __MERGED_LISTGRAPH__ = utils.load_or_create_cache( 'listgraph_merged', initializer) return __MERGED_LISTGRAPH__
def get_base_listgraph() -> ListGraph: """Retrieve basic list graph without any modifications.""" global __BASE_LISTGRAPH__ if '__BASE_LISTGRAPH__' not in globals(): initializer = lambda: ListGraph.create_from_dbpedia( ).append_unconnected() __BASE_LISTGRAPH__ = utils.load_or_create_cache( 'listgraph_base', initializer) return __BASE_LISTGRAPH__
def get_conceptual_category_graph() -> CategoryGraph: """Retrieve category graph with filtered categories.""" global __CONCEPTUAL_CATEGORY_GRAPH__ if '__CONCEPTUAL_CATEGORY_GRAPH__' not in globals(): initializer = lambda: CategoryGraph.create_from_dbpedia( ).make_conceptual().append_unconnected() __CONCEPTUAL_CATEGORY_GRAPH__ = utils.load_or_create_cache( 'catgraph_conceptual', initializer) return __CONCEPTUAL_CATEGORY_GRAPH__
def get_merged_graph() -> CategoryGraph: """Retrieve the cycle-free category graph with filtered+merged categories and filtered edges.""" global __MERGED_GRAPH__ if '__MERGED_GRAPH__' not in globals(): initializer = lambda: get_cyclefree_wikitaxonomy_graph().merge_nodes( ).remove_transitive_edges() __MERGED_GRAPH__ = utils.load_or_create_cache('catgraph_merged', initializer) return __MERGED_GRAPH__
def get_wikitaxonomy_graph() -> CategoryGraph: """Retrieve category graph with filtered categories and edges.""" global __WIKITAXONOMY_CATEGORY_GRAPH__ if '__WIKITAXONOMY_CATEGORY_GRAPH__' not in globals(): initializer = lambda: get_conceptual_category_graph( ).remove_unrelated_edges() __WIKITAXONOMY_CATEGORY_GRAPH__ = utils.load_or_create_cache( 'catgraph_wikitaxonomy', initializer) return __WIKITAXONOMY_CATEGORY_GRAPH__
def resolve_redirect(dbp_resource: str, visited=None) -> str: """Return the resource to which `dbp_resource` redirects (if any) or `dbp_resource` itself.""" global __REDIRECTS__ if '__REDIRECTS__' not in globals(): initializer = lambda: rdf_util.create_single_val_dict_from_rdf([utils.get_data_file('files.dbpedia.redirects')], rdf_util.PREDICATE_REDIRECTS) __REDIRECTS__ = utils.load_or_create_cache('dbpedia_resource_redirects', initializer) if dbp_resource in __REDIRECTS__: visited = visited or set() if dbp_resource not in visited: return resolve_redirect(__REDIRECTS__[dbp_resource], visited | {dbp_resource}) return dbp_resource
def _get_page_data() -> dict: global __SUBJECT_ENTITY_PAGE_DATA__ if '__SUBJECT_ENTITY_PAGE_DATA__' not in globals(): __SUBJECT_ENTITY_PAGE_DATA__ = utils.load_or_create_cache( 'subject_entity_page_data', _retrieve_page_data) return __SUBJECT_ENTITY_PAGE_DATA__
def _get_raw_markup_from_xml() -> dict: return defaultdict( str, utils.load_or_create_cache('wikipedia_raw_markup', _parse_raw_markup_from_xml))
def get_type_frequency(dbp_type: str) -> float: """Return the amount of resources having `dbp_type` as type.""" global __TYPE_FREQUENCY__ if '__TYPE_FREQUENCY__' not in globals(): __TYPE_FREQUENCY__ = defaultdict(int, utils.load_or_create_cache('dbpedia_resource_type_frequency', _compute_type_frequency)) return __TYPE_FREQUENCY__[dbp_type]
def is_functional(dbp_predicate: str) -> bool: """Return True, if the predicate is functional (i.e. a resource has at most one value for the given predicate).""" global __PREDICATE_FUNCTIONAL__ if '__PREDICATE_FUNCTIONAL__' not in globals(): __PREDICATE_FUNCTIONAL__ = defaultdict(bool, utils.load_or_create_cache('dbpedia_functional_predicates', _create_functional_predicate_dict)) return __PREDICATE_FUNCTIONAL__[dbp_predicate]
def get_disambiguation_mapping() -> dict: global __DISAMBIGUATIONS__ if '__DISAMBIGUATIONS__' not in globals(): initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.disambiguations')], rdf_util.PREDICATE_DISAMBIGUATES) __DISAMBIGUATIONS__ = defaultdict(set, utils.load_or_create_cache('dbpedia_resource_disambiguations', initializer)) return __DISAMBIGUATIONS__
def extract_parent_categories() -> dict: initializer = lambda: _extract_parent_categories_from_markup( _get_raw_categories_and_templates_from_xml()) return utils.load_or_create_cache('wikipedia_parent_categories', initializer)
def _get_label_mapping() -> dict: global __RESOURCE_LABEL_MAPPING__ if '__RESOURCE_LABEL_MAPPING__' not in globals(): initializer = lambda: rdf_util.create_single_val_dict_from_rdf([utils.get_data_file('files.dbpedia.labels')], rdf_util.PREDICATE_LABEL) __RESOURCE_LABEL_MAPPING__ = utils.load_or_create_cache('dbpedia_resource_labels', initializer) return __RESOURCE_LABEL_MAPPING__
def get_parsed_articles() -> dict: initializer = lambda: _parse_articles(_get_raw_articles_from_xml()) return defaultdict( lambda: None, utils.load_or_create_cache('wikipedia_parsed_articles', initializer))
def get_inverse_lexicalisations(text: str) -> dict: """Return all resources that fit to the given lexicalisation.""" global __RESOURCE_INVERSE_LEXICALISATIONS__ if '__RESOURCE_INVERSE_LEXICALISATIONS__' not in globals(): __RESOURCE_INVERSE_LEXICALISATIONS__ = defaultdict(dict, utils.load_or_create_cache('dbpedia_resource_inverse_lexicalisations', _compute_inverse_lexicalisations)) return __RESOURCE_INVERSE_LEXICALISATIONS__[text.lower()]