def get_entity_relations(): """Retrieve all existing relation triples in the knowledge graph. As DBpedia and CaLiGraph have the same base set of triples, we can retrieve it directly from DBpedia. """ rpm = dbp_store.get_resource_property_mapping() data = [(dbp_util.resource2name(sub), pred, dbp_util.resource2name(obj)) for sub in rpm for pred in rpm[sub] for obj in rpm[sub][pred] if dbp_util.is_dbp_resource(obj)] return pd.DataFrame(data, columns=['sub', 'pred', 'obj'])
def is_object_property(dbp_predicate: str) -> bool: """Return True, if the predicate always has a resource as object.""" global __OBJECT_PROPERTY__ if '__OBJECT_PROPERTY__' not in globals(): __OBJECT_PROPERTY__ = defaultdict(bool) if dbp_predicate not in __OBJECT_PROPERTY__: if get_range(dbp_predicate): __OBJECT_PROPERTY__[dbp_predicate] = dbp_util.is_dbp_type(get_range(dbp_predicate)) else: for props in get_resource_property_mapping().values(): if dbp_predicate in props: __OBJECT_PROPERTY__[dbp_predicate] = dbp_util.is_dbp_resource(props[dbp_predicate].pop()) break return __OBJECT_PROPERTY__[dbp_predicate]
def compute_axioms(self): """Compute axioms for all nodes in the graph.""" utils.get_logger().info( 'CaLiGraph: Computing Cat2Ax axioms for CaLiGraph..') for node, axioms in clg_axioms.extract_axioms(self).items(): for ax in axioms: prop = clg_util.dbp_type2clg_type(ax[1]) val = clg_util.dbp_resource2clg_resource( ax[2]) if dbp_util.is_dbp_resource(ax[2]) else ax[2] self._node_axioms[node].add((prop, val)) # filter out axioms that can be inferred from parents for node in self.nodes: parent_axioms = { ax for p in self.parents(node) for ax in self.get_axioms(p) } self._node_axioms[node] = self._node_axioms[node].difference( parent_axioms) return self
def _get_lines_dbpedia_instance_relations(graph) -> list: """Serialize new facts for DBpedia resources in DBpedia namespace.""" new_instance_relations = set() for node in graph.nodes: for prop, val in graph.get_axioms(node): dbp_prop = clg_util.clg_type2dbp_type(prop) dbp_val = clg_util.clg_resource2dbp_resource( val) if clg_util.is_clg_resource(val) else val for res in graph.get_resources(node): dbp_res = clg_util.clg_resource2dbp_resource(res) if dbp_res not in dbp_store.get_resources( ) or dbp_prop not in dbp_store.get_properties( dbp_res) or dbp_val not in dbp_store.get_properties( dbp_res)[dbp_prop]: new_instance_relations.add((dbp_res, dbp_prop, dbp_val)) lines_dbpedia_instance_relations = [] for s, p, o in new_instance_relations: if dbp_util.is_dbp_resource(o): lines_dbpedia_instance_relations.append( serialize_util.as_object_triple(s, p, o)) else: lines_dbpedia_instance_relations.append( serialize_util.as_literal_triple(s, p, o)) return lines_dbpedia_instance_relations
def is_possible_resource(obj: str) -> bool: """Return True, if the given object is a potential DBpedia resource (i.e. valid URI, no file, no category, ..).""" return dbp_util.is_dbp_resource(obj) and not dbp_util.is_file_resource(obj) and not cat_util.is_category(obj) and not list_util.is_listpage(obj) and not list_util.is_listspage(obj)
def _extract_axioms_with_rules(cat_dfs: dict) -> set: """Return axioms genered by applying C-DF rules.""" # generate rule candidates by extracting shared pre-/postfixes cdf_rule_candidates = defaultdict(lambda: defaultdict(lambda: 0)) for cat, (df, _) in cat_dfs.items(): cat_label = cat_store.get_label(cat) for f in {f for f in df if f[0] != rdf_util.PREDICATE_TYPE}: if dbp_util.is_dbp_resource(f[1]): f_label = dbp_store._get_label_mapping()[ f[1]] if f[1] in dbp_store._get_label_mapping( ) else dbp_util.object2name(f[1]) else: f_label = f[1] if f_label in cat_label: first_words = cat_label[:cat_label.index(f_label)].strip() first_words = tuple( first_words.split(' ')) if first_words else tuple() last_words = cat_label[cat_label.index(f_label) + len(f_label):].strip() last_words = tuple( last_words.split(' ')) if last_words else tuple() if first_words or last_words: f_types = dbp_store.get_independent_types( dbp_store.get_types(f[1])) if dbp_util.is_dbp_resource( f[1]) else set() f_type = f_types.pop() if f_types else None cdf_rule_candidates[(first_words, last_words)][((f[0], f_type), tuple( set(df).difference( {f})))] += 1 # filter rules using the threshold parameters min_support and beta cdf_rules = {} min_support = util.get_config('cdf.min_support') beta = util.get_config('cdf.beta') for word_patterns in cdf_rule_candidates: total_support = sum(cdf_rule_candidates[word_patterns].values()) valid_axiom_patterns = [ pattern for pattern, support in cdf_rule_candidates[word_patterns].items() if support >= min_support and (support / total_support) >= beta ] if len(valid_axiom_patterns) > 0: cdf_rules[word_patterns] = valid_axiom_patterns[0] # apply the patterns to all categories in order to extract axioms # (the rules are applied individually depending on whether the pattern is at the front, back, or front+back in order to reduce computational complexity) cdf_front_patterns = { word_patterns: axiom_pattern for word_patterns, axiom_pattern in cdf_rules.items() if word_patterns[0] and not word_patterns[1] } cdf_front_pattern_dict = {} for (front_pattern, back_pattern), axiom_patterns in cdf_front_patterns.items(): _fill_dict( cdf_front_pattern_dict, list(front_pattern), lambda d: _fill_dict( d, list(reversed(back_pattern)), axiom_patterns)) cdf_back_patterns = { word_patterns: axiom_pattern for word_patterns, axiom_pattern in cdf_rules.items() if not word_patterns[0] and word_patterns[1] } cdf_back_pattern_dict = {} for (front_pattern, back_pattern), axiom_patterns in cdf_back_patterns.items(): _fill_dict( cdf_back_pattern_dict, list(front_pattern), lambda d: _fill_dict( d, list(reversed(back_pattern)), axiom_patterns)) cdf_enclosing_patterns = { word_patterns: axiom_pattern for word_patterns, axiom_pattern in cdf_rules.items() if word_patterns[0] and word_patterns[1] } cdf_enclosing_pattern_dict = {} for (front_pattern, back_pattern), axiom_patterns in cdf_enclosing_patterns.items(): _fill_dict( cdf_enclosing_pattern_dict, list(front_pattern), lambda d: _fill_dict( d, list(reversed(back_pattern)), axiom_patterns)) rule_axioms = set() for cat in cat_store.get_usable_cats(): rule_axioms.update(_apply_rules(cdf_front_pattern_dict, cat)) rule_axioms.update(_apply_rules(cdf_back_pattern_dict, cat)) rule_axioms.update(_apply_rules(cdf_enclosing_pattern_dict, cat)) return rule_axioms