Ejemplo n.º 1
0
def get_entity_relations():
    """Retrieve all existing relation triples in the knowledge graph.
    As DBpedia and CaLiGraph have the same base set of triples, we can retrieve it directly from DBpedia.
    """
    rpm = dbp_store.get_resource_property_mapping()
    data = [(dbp_util.resource2name(sub), pred, dbp_util.resource2name(obj))
            for sub in rpm for pred in rpm[sub] for obj in rpm[sub][pred]
            if dbp_util.is_dbp_resource(obj)]
    return pd.DataFrame(data, columns=['sub', 'pred', 'obj'])
Ejemplo n.º 2
0
def is_object_property(dbp_predicate: str) -> bool:
    """Return True, if the predicate always has a resource as object."""
    global __OBJECT_PROPERTY__
    if '__OBJECT_PROPERTY__' not in globals():
        __OBJECT_PROPERTY__ = defaultdict(bool)
    if dbp_predicate not in __OBJECT_PROPERTY__:
        if get_range(dbp_predicate):
            __OBJECT_PROPERTY__[dbp_predicate] = dbp_util.is_dbp_type(get_range(dbp_predicate))
        else:
            for props in get_resource_property_mapping().values():
                if dbp_predicate in props:
                    __OBJECT_PROPERTY__[dbp_predicate] = dbp_util.is_dbp_resource(props[dbp_predicate].pop())
                    break
    return __OBJECT_PROPERTY__[dbp_predicate]
Ejemplo n.º 3
0
 def compute_axioms(self):
     """Compute axioms for all nodes in the graph."""
     utils.get_logger().info(
         'CaLiGraph: Computing Cat2Ax axioms for CaLiGraph..')
     for node, axioms in clg_axioms.extract_axioms(self).items():
         for ax in axioms:
             prop = clg_util.dbp_type2clg_type(ax[1])
             val = clg_util.dbp_resource2clg_resource(
                 ax[2]) if dbp_util.is_dbp_resource(ax[2]) else ax[2]
             self._node_axioms[node].add((prop, val))
     # filter out axioms that can be inferred from parents
     for node in self.nodes:
         parent_axioms = {
             ax
             for p in self.parents(node) for ax in self.get_axioms(p)
         }
         self._node_axioms[node] = self._node_axioms[node].difference(
             parent_axioms)
     return self
Ejemplo n.º 4
0
def _get_lines_dbpedia_instance_relations(graph) -> list:
    """Serialize new facts for DBpedia resources in DBpedia namespace."""
    new_instance_relations = set()
    for node in graph.nodes:
        for prop, val in graph.get_axioms(node):
            dbp_prop = clg_util.clg_type2dbp_type(prop)
            dbp_val = clg_util.clg_resource2dbp_resource(
                val) if clg_util.is_clg_resource(val) else val
            for res in graph.get_resources(node):
                dbp_res = clg_util.clg_resource2dbp_resource(res)
                if dbp_res not in dbp_store.get_resources(
                ) or dbp_prop not in dbp_store.get_properties(
                        dbp_res) or dbp_val not in dbp_store.get_properties(
                            dbp_res)[dbp_prop]:
                    new_instance_relations.add((dbp_res, dbp_prop, dbp_val))
    lines_dbpedia_instance_relations = []
    for s, p, o in new_instance_relations:
        if dbp_util.is_dbp_resource(o):
            lines_dbpedia_instance_relations.append(
                serialize_util.as_object_triple(s, p, o))
        else:
            lines_dbpedia_instance_relations.append(
                serialize_util.as_literal_triple(s, p, o))
    return lines_dbpedia_instance_relations
Ejemplo n.º 5
0
def is_possible_resource(obj: str) -> bool:
    """Return True, if the given object is a potential DBpedia resource (i.e. valid URI, no file, no category, ..)."""
    return dbp_util.is_dbp_resource(obj) and not dbp_util.is_file_resource(obj) and not cat_util.is_category(obj) and not list_util.is_listpage(obj) and not list_util.is_listspage(obj)
Ejemplo n.º 6
0
Archivo: cdf.py Proyecto: nheist/Cat2Ax
def _extract_axioms_with_rules(cat_dfs: dict) -> set:
    """Return axioms genered by applying C-DF rules."""

    # generate rule candidates by extracting shared pre-/postfixes
    cdf_rule_candidates = defaultdict(lambda: defaultdict(lambda: 0))
    for cat, (df, _) in cat_dfs.items():
        cat_label = cat_store.get_label(cat)
        for f in {f for f in df if f[0] != rdf_util.PREDICATE_TYPE}:
            if dbp_util.is_dbp_resource(f[1]):
                f_label = dbp_store._get_label_mapping()[
                    f[1]] if f[1] in dbp_store._get_label_mapping(
                    ) else dbp_util.object2name(f[1])
            else:
                f_label = f[1]
            if f_label in cat_label:
                first_words = cat_label[:cat_label.index(f_label)].strip()
                first_words = tuple(
                    first_words.split(' ')) if first_words else tuple()
                last_words = cat_label[cat_label.index(f_label) +
                                       len(f_label):].strip()
                last_words = tuple(
                    last_words.split(' ')) if last_words else tuple()
                if first_words or last_words:
                    f_types = dbp_store.get_independent_types(
                        dbp_store.get_types(f[1])) if dbp_util.is_dbp_resource(
                            f[1]) else set()
                    f_type = f_types.pop() if f_types else None
                    cdf_rule_candidates[(first_words,
                                         last_words)][((f[0], f_type),
                                                       tuple(
                                                           set(df).difference(
                                                               {f})))] += 1

    # filter rules using the threshold parameters min_support and beta
    cdf_rules = {}
    min_support = util.get_config('cdf.min_support')
    beta = util.get_config('cdf.beta')
    for word_patterns in cdf_rule_candidates:
        total_support = sum(cdf_rule_candidates[word_patterns].values())
        valid_axiom_patterns = [
            pattern
            for pattern, support in cdf_rule_candidates[word_patterns].items()
            if support >= min_support and (support / total_support) >= beta
        ]

        if len(valid_axiom_patterns) > 0:
            cdf_rules[word_patterns] = valid_axiom_patterns[0]

    # apply the patterns to all categories in order to extract axioms
    # (the rules are applied individually depending on whether the pattern is at the front, back, or front+back in order to reduce computational complexity)
    cdf_front_patterns = {
        word_patterns: axiom_pattern
        for word_patterns, axiom_pattern in cdf_rules.items()
        if word_patterns[0] and not word_patterns[1]
    }
    cdf_front_pattern_dict = {}
    for (front_pattern,
         back_pattern), axiom_patterns in cdf_front_patterns.items():
        _fill_dict(
            cdf_front_pattern_dict, list(front_pattern), lambda d: _fill_dict(
                d, list(reversed(back_pattern)), axiom_patterns))

    cdf_back_patterns = {
        word_patterns: axiom_pattern
        for word_patterns, axiom_pattern in cdf_rules.items()
        if not word_patterns[0] and word_patterns[1]
    }
    cdf_back_pattern_dict = {}
    for (front_pattern,
         back_pattern), axiom_patterns in cdf_back_patterns.items():
        _fill_dict(
            cdf_back_pattern_dict, list(front_pattern), lambda d: _fill_dict(
                d, list(reversed(back_pattern)), axiom_patterns))

    cdf_enclosing_patterns = {
        word_patterns: axiom_pattern
        for word_patterns, axiom_pattern in cdf_rules.items()
        if word_patterns[0] and word_patterns[1]
    }
    cdf_enclosing_pattern_dict = {}
    for (front_pattern,
         back_pattern), axiom_patterns in cdf_enclosing_patterns.items():
        _fill_dict(
            cdf_enclosing_pattern_dict,
            list(front_pattern), lambda d: _fill_dict(
                d, list(reversed(back_pattern)), axiom_patterns))

    rule_axioms = set()
    for cat in cat_store.get_usable_cats():
        rule_axioms.update(_apply_rules(cdf_front_pattern_dict, cat))
        rule_axioms.update(_apply_rules(cdf_back_pattern_dict, cat))
        rule_axioms.update(_apply_rules(cdf_enclosing_pattern_dict, cat))
    return rule_axioms