def create_kg(triples, label_predicates): kg = KnowledgeGraph() for (s, p, o) in tqdm(triples): if p not in label_predicates: s_v = Vertex(str(s)) o_v = Vertex(str(o)) p_v = Vertex(str(p), predicate=True, _from=s_v, _to=o_v) kg.add_vertex(s_v) kg.add_vertex(p_v) kg.add_vertex(o_v) kg.add_edge(s_v, p_v) kg.add_edge(p_v, o_v) return kg
def extract(self, graph, instances): canonical_walks = set() all_walks = [] for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) all_walks.extend(walks) freq = defaultdict(set) for i in range(len(all_walks)): for hop in all_walks[i]: freq[hop.name].add(i) for freq_threshold in self.freq_thresholds: uniformative_hops = set() for hop in freq: # if len(freq[hop])/len(all_walks) > self.ub_freq_threshold: # uniformative_hops.add(hop) if len(freq[hop]) / len(all_walks) < freq_threshold: uniformative_hops.add(hop) for walk in all_walks: canonical_walk = [] for i, hop in enumerate(walk): if i == 0: canonical_walk.append(hop.name) else: if hop.name not in uniformative_hops: digest = md5(hop.name.encode()).digest()[:8] canonical_walk.append(str(digest)) canonical_walks.add(tuple(canonical_walk)) return canonical_walks
def extract(self, graph, instances): canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: for n in range(1, len(walk)): canonical_walks.add((walk[0].name, walk[n].name)) return canonical_walks
def extract(self, graph, instances): canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walks.add(tuple([x.name for x in walk])) for wildcard in self.wildcards: combinations = itertools.combinations( range(1, len(walk)), wildcard) for idx in combinations: new_walk = [] for ix, hop in enumerate(walk): if ix in idx: new_walk.append(Vertex('*')) else: new_walk.append(hop.name) canonical_walks.add(tuple(new_walk)) return canonical_walks
def extract(self, graph, instances): canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walks.add(tuple(self._take_n_grams(walk))) # Introduce wild-cards and re-calculate n-grams if self.wildcards is None: continue for wildcard in self.wildcards: for idx in itertools.combinations(range(1, len(walk)), wildcard): new_walk = list(walk).copy() for ix in idx: new_walk[ix] = Vertex('*') canonical_walks.add(tuple( self._take_n_grams(new_walk))) return canonical_walks
def extract(self, graph, instances): canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walk = [] str_walk = [x.name for x in walk] for i, hop in enumerate(walk): if i == 0:# or i % 2 == 1: canonical_walk.append(hop.name) else: canonical_walk.append(str(str_walk.index(hop.name))) canonical_walks.add(tuple(canonical_walk)) return canonical_walks
def extract(self, graph, instances): canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walk = [] for i, hop in enumerate(walk): if i == 0 or i % 2 == 1: canonical_walk.append(hop.name) else: digest = md5(hop.name.encode()).digest()[:8] canonical_walk.append(str(digest)) canonical_walks.add(tuple(canonical_walk)) return canonical_walks
def extract(self, graph, instances): self._weisfeiler_lehman(graph) canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for n in range(self.wl_iterations + 1): for walk in walks: canonical_walk = [] for i, hop in enumerate(walk): if i == 0 or i % 2 == 1: canonical_walk.append(hop.name) else: canonical_walk.append(self._label_map[hop][n]) canonical_walks.add(tuple(canonical_walk)) return canonical_walks