Example #1
0
def get_resource_property_mapping() -> dict:
    global __RESOURCE_PROPERTY_MAPPING__
    if '__RESOURCE_PROPERTY_MAPPING__' not in globals():
        property_files = [
            util.get_data_file('files.dbpedia.mappingbased_literals'),
            util.get_data_file('files.dbpedia.mappingbased_objects')
        ]
        initializer = lambda: rdf_util.create_dict_from_rdf(property_files)
        __RESOURCE_PROPERTY_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_properties', initializer)

    return __RESOURCE_PROPERTY_MAPPING__
Example #2
0
def _get_resource_type_mapping() -> dict:
    global __RESOURCE_TYPE_MAPPING__
    if '__RESOURCE_TYPE_MAPPING__' not in globals():
        type_files = [
            util.get_data_file('files.dbpedia.instance_types'),
            util.get_data_file('files.dbpedia.transitive_instance_types'),
        ]
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            type_files, rdf_util.PREDICATE_TYPE)
        __RESOURCE_TYPE_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_type_mapping', initializer)

    return __RESOURCE_TYPE_MAPPING__
Example #3
0
def get_disjoint_types(dbp_type: str) -> set:
    """Return all types that are disjoint with `dbp_type` (excluding the wrong disjointness Agent<->Place)."""
    global __DISJOINT_TYPE_MAPPING__
    if '__DISJOINT_TYPE_MAPPING__' not in globals():
        __DISJOINT_TYPE_MAPPING__ = rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.taxonomy')],
            rdf_util.PREDICATE_DISJOINT_WITH,
            reflexive=True)
        # add/remove custom axioms
        __DISJOINT_TYPE_MAPPING__ = defaultdict(
            set, {
                k: {
                    v
                    for v in values
                    if {k, v} not in REMOVED_DISJOINTNESS_AXIOMS
                }
                for k, values in __DISJOINT_TYPE_MAPPING__.items()
            })
        for a, b in ADDED_DISJOINTNESS_AXIOMS:
            __DISJOINT_TYPE_MAPPING__[a].add(b)
            __DISJOINT_TYPE_MAPPING__[b].add(a)

        # completing the subtype of each type with the subtypes of its disjoint types
        __DISJOINT_TYPE_MAPPING__ = defaultdict(
            set, {
                t: {
                    st
                    for dt in disjoint_types
                    for st in get_transitive_subtypes(dt)
                }
                for t, disjoint_types in __DISJOINT_TYPE_MAPPING__.items()
            })

    return __DISJOINT_TYPE_MAPPING__[dbp_type]
Example #4
0
def _save_to_file(netgen_output, problem_num, density):
    with open(util.get_data_file('%s_%d.txt' % (density, problem_num)), 'w') as f:
        nl = ''
        for line in netgen_output.split('\n'):
            if not line.replace('\n', '') or line.startswith('c'):
                continue
            f.write('%s%s' % (nl, line))
            nl = '\n'
Example #5
0
def get_maintenance_categories() -> set:
    global __MAINTENANCE_CATS__
    if '__MAINTENANCE_CATS__' not in globals():
        __MAINTENANCE_CATS__ = set(
            rdf_util.create_single_val_dict_from_rdf(
                [util.get_data_file('files.dbpedia.maintenance_categories')],
                rdf_util.PREDICATE_TYPE))

    return __MAINTENANCE_CATS__
Example #6
0
def get_equivalent_types(dbp_type: str) -> set:
    global __EQUIVALENT_TYPE_MAPPING__
    if '__EQUIVALENT_TYPE_MAPPING__' not in globals():
        __EQUIVALENT_TYPE_MAPPING__ = rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.taxonomy')],
            rdf_util.PREDICATE_EQUIVALENT_CLASS,
            reflexive=True)

    return {dbp_type} | __EQUIVALENT_TYPE_MAPPING__[dbp_type]
Example #7
0
def get_range(dbp_predicate: str) -> Optional[str]:
    global __PREDICATE_RANGE__
    if '__PREDICATE_RANGE__' not in globals():
        __PREDICATE_RANGE__ = rdf_util.create_single_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.taxonomy')],
            rdf_util.PREDICATE_RANGE)

    return __PREDICATE_RANGE__[
        dbp_predicate] if dbp_predicate in __PREDICATE_RANGE__ else None
Example #8
0
def get_label(category: str) -> str:
    global __CATEGORY_LABELS__
    if '__CATEGORY_LABELS__' not in globals():
        __CATEGORY_LABELS__ = rdf_util.create_single_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.categories')],
            rdf_util.PREDICATE_SKOS_LABEL)

    return __CATEGORY_LABELS__[
        category] if category in __CATEGORY_LABELS__ else cat_util.category2name(
            category)
Example #9
0
def _get_label_mapping() -> dict:
    global __RESOURCE_LABEL_MAPPING__
    if '__RESOURCE_LABEL_MAPPING__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([
            util.get_data_file('files.dbpedia.labels')
        ], rdf_util.PREDICATE_LABEL)
        __RESOURCE_LABEL_MAPPING__ = util.load_or_create_cache(
            'dbpedia_resource_labels', initializer)

    return __RESOURCE_LABEL_MAPPING__
Example #10
0
def get_children(category: str) -> set:
    global __CHILDREN__
    if '__CHILDREN__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.categories')],
            rdf_util.PREDICATE_BROADER,
            reverse_key=True)
        __CHILDREN__ = util.load_or_create_cache('dbpedia_category_children',
                                                 initializer)

    return __CHILDREN__[category].difference({category})
Example #11
0
def get_resources(category: str) -> set:
    global __CATEGORY_RESOURCES__
    if '__CATEGORY_RESOURCES__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.article_categories')],
            rdf_util.PREDICATE_SUBJECT,
            reverse_key=True)
        __CATEGORY_RESOURCES__ = util.load_or_create_cache(
            'dbpedia_category_resources', initializer)

    return __CATEGORY_RESOURCES__[category]
Example #12
0
def get_label(dbp_object: str) -> str:
    global __RESOURCE_LABELS__
    if '__RESOURCE_LABELS__' not in globals():
        __RESOURCE_LABELS__ = dict(_get_label_mapping())
        __RESOURCE_LABELS__.update(
            rdf_util.create_single_val_dict_from_rdf(
                [util.get_data_file('files.dbpedia.taxonomy')],
                rdf_util.PREDICATE_LABEL))
    return __RESOURCE_LABELS__[
        dbp_object] if dbp_object in __RESOURCE_LABELS__ else dbp_util.object2name(
            dbp_object)
Example #13
0
def get_categories() -> set:
    global __CATEGORIES__
    if '__CATEGORIES__' not in globals():
        initializer = lambda: set(
            rdf_util.create_single_val_dict_from_rdf([
                util.get_data_file('files.dbpedia.categories')
            ], rdf_util.PREDICATE_TYPE))
        __CATEGORIES__ = util.load_or_create_cache('dbpedia_categories',
                                                   initializer)

    return __CATEGORIES__
Example #14
0
def _get_type_graph() -> nx.DiGraph:
    """Return the initialised graph of DBpedia types."""
    global __TYPE_GRAPH__
    if '__TYPE_GRAPH__' not in globals():
        subtype_mapping = rdf_util.create_multi_val_dict_from_rdf(
            [util.get_data_file('files.dbpedia.taxonomy')],
            rdf_util.PREDICATE_SUBCLASS_OF,
            reverse_key=True)
        # add missing types (i.e. those, that do not have subclasses at all)
        all_types = rdf_util.create_set_from_rdf(
            [util.get_data_file('files.dbpedia.taxonomy')],
            rdf_util.PREDICATE_TYPE, rdf_util.CLASS_OWL_CLASS)
        subtype_mapping.update({
            et: set()
            for t in all_types for et in get_equivalent_types(t)
            if et not in subtype_mapping
        })
        # completing subtypes with subtypes of equivalent types
        subtype_mapping = {
            t: {
                est
                for et in get_equivalent_types(t) for st in subtype_mapping[et]
                for est in get_equivalent_types(st)
            }
            for t in set(subtype_mapping)
        }
        # remove non-dbpedia types from ontology
        subtype_mapping = {
            t: {
                st
                for st in sts
                if dbp_util.is_dbp_type(st) or st == rdf_util.CLASS_OWL_THING
            }
            for t, sts in subtype_mapping.items()
            if dbp_util.is_dbp_type(t) or t == rdf_util.CLASS_OWL_THING
        }
        __TYPE_GRAPH__ = nx.DiGraph(
            incoming_graph_data=[(t, st) for t, sts in subtype_mapping.items()
                                 for st in sts])

    return __TYPE_GRAPH__
Example #15
0
def _retrieve_plaintexts() -> Tuple[str, str]:
    """Return an iterator over DBpedia resources and their Wikipedia plaintexts."""
    with bz2.open(util.get_data_file('files.dbpedia.nif_context'),
                  mode='rb') as nif_file:
        nif_collection = pynif.NIFCollection.loads(nif_file.read(),
                                                   format='turtle')
        for nif_context in nif_collection.contexts:
            resource_uri = nif_context.original_uri[:nif_context.original_uri.
                                                    rfind('?')]
            # remove parentheses and line breaks from text for easier parsing
            resource_plaintext = _remove_parentheses_content(
                nif_context.mention.replace('\n', ' '))
            yield resource_uri, resource_plaintext
Example #16
0
def resolve_redirect(dbp_resource: str, visited=None) -> str:
    """Return the resource to which `dbp_resource` redirects (if any) or `dbp_resource` itself."""
    global __REDIRECTS__
    if '__REDIRECTS__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([
            util.get_data_file('files.dbpedia.redirects')
        ], rdf_util.PREDICATE_REDIRECTS)
        __REDIRECTS__ = util.load_or_create_cache('dbpedia_resource_redirects',
                                                  initializer)

    if dbp_resource in __REDIRECTS__:
        visited = visited or set()
        if dbp_resource not in visited:
            return resolve_redirect(__REDIRECTS__[dbp_resource],
                                    visited | {dbp_resource})
    return dbp_resource
Example #17
0
def _compute_inverse_lexicalisations():
    inverse_lexicalisation_dict = rdf_util.create_multi_val_freq_dict_from_rdf(
        [util.get_data_file('files.dbpedia.anchor_texts')],
        rdf_util.PREDICATE_ANCHOR_TEXT,
        reverse_key=True)
    for lex, resources in inverse_lexicalisation_dict.items():
        for res in set(resources.keys()):
            redirect_res = resolve_redirect(res)
            if res != redirect_res:
                if redirect_res in inverse_lexicalisation_dict[lex]:
                    inverse_lexicalisation_dict[lex][
                        redirect_res] += inverse_lexicalisation_dict[lex][res]
                else:
                    inverse_lexicalisation_dict[lex][
                        redirect_res] = inverse_lexicalisation_dict[lex][res]
                del inverse_lexicalisation_dict[lex][res]
    return inverse_lexicalisation_dict
Example #18
0
 def _run_maxflow(self, maxflow_func, data_file):
     flow_network = DIMACSGraphFactory.create(util.get_data_file(data_file))
     return self.measure_execution_time(maxflow_func, flow_network)