def extract(self, graph, instances): """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph (graph.KnowledgeGraph): The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances (array-like): The instances to extract the knowledge graph. Returns: set: The 2D matrix with its: number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walks.add(tuple([x.name for x in walk])) for wildcard in self.wildcards: combinations = itertools.combinations( range(1, len(walk)), wildcard) for idx in combinations: new_walk = [] for ix, hop in enumerate(walk): if ix in idx: new_walk.append(Vertex("*")) else: new_walk.append(hop.name) canonical_walks.add(tuple(new_walk)) return canonical_walks
def create_kg(triples, label_predicates): """Creates a knowledge graph according to triples and predicates label. Args: triples (list): The triples where each item in this list must be an iterable (e.g., tuple, list) of three elements. label_predicates (list): The URI's of the predicates that have to be excluded from the graph to avoid leakage. Returns: graph.KnowledgeGraph: The knowledge graph. """ kg = KnowledgeGraph() for (s, p, o) in tqdm(triples): if p not in label_predicates: s_v = Vertex(str(s)) o_v = Vertex(str(o)) p_v = Vertex(str(p), predicate=True, vprev=s_v, vnext=o_v) kg.add_vertex(s_v) kg.add_vertex(p_v) kg.add_vertex(o_v) kg.add_edge(s_v, p_v) kg.add_edge(p_v, o_v) return kg
def extract(self, graph, instances): """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph (graph.KnowledgeGraph): The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances (array-like): The instances to extract the knowledge graph. Returns: set: The 2D matrix with its: number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walk = [] for i, hop in enumerate(walk): if i == 0 or i % 2 == 1: canonical_walk.append(hop.name) else: digest = md5(hop.name.encode()).digest()[:8] canonical_walk.append(str(digest)) canonical_walks.add(tuple(canonical_walk)) return canonical_walks
def extract(self, graph, instances): """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph (graph.KnowledgeGraph): The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances (array-like): The instances to extract the knowledge graph. Returns: set: The 2D matrix with its: number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: canonical_walks.add(tuple(self._take_n_grams(walk))) # Introduce wild-cards and re-calculate n-grams if self.wildcards is None: continue for wildcard in self.wildcards: for idx in itertools.combinations(range(1, len(walk)), wildcard): new_walk = list(walk).copy() for ix in idx: new_walk[ix] = Vertex("*") canonical_walks.add(tuple( self._take_n_grams(new_walk))) return canonical_walks
def extract(self, graph, instances): """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph (graph.KnowledgeGraph): The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances (array-like): The instances to extract the knowledge graph. Returns: list: The 2D matrix with its: number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() all_walks = [] for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) all_walks.extend(walks) freq = defaultdict(set) for i in range(len(all_walks)): for hop in all_walks[i]: freq[hop.name].add(i) for freq_threshold in self.freq_thresholds: uniformative_hops = set() for hop in freq: # if len(freq[hop])/len(all_walks) > self.ub_freq_threshold: # uniformative_hops.add(hop) if len(freq[hop]) / len(all_walks) < freq_threshold: uniformative_hops.add(hop) for walk in all_walks: canonical_walk = [] for i, hop in enumerate(walk): if i == 0: canonical_walk.append(hop.name) else: if hop.name not in uniformative_hops: digest = md5(hop.name.encode()).digest()[:8] canonical_walk.append(str(digest)) canonical_walks.add(tuple(canonical_walk)) return canonical_walks
def extract(self, graph, instances): """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph (graph.KnowledgeGraph): The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances (array-like): The instances to extract the knowledge graph. Returns: set: The 2D matrix with its: number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() for instance in instances: walks = self.extract_random_walks(graph, Vertex(str(instance))) for walk in walks: for n in range(1, len(walk)): canonical_walks.add((walk[0].name, walk[n].name)) return canonical_walks
def test_extract_random_walks(self): walks = RandomWalker(4, float("inf")).extract_random_walks( KG, Vertex(str(generate_entities()))) assert type(walks) == list