Ejemplo n.º 1
0
    def _bfs(
        self, kg: KG, entity: Vertex, is_reverse: bool = False
    ) -> List[Walk]:
        """Extracts random walks for an entity based on Knowledge Graph using
        the Breath First Search (BFS) algorithm.

        Args:
            kg: The Knowledge Graph.
            entity: The root node to extract walks.
            is_reverse: True to get the parent neighbors instead of the child
                neighbors, False otherwise.
                Defaults to False.

        Returns:
            The list of unique walks for the provided entity.

        """
        walks: Set[Walk] = {(entity,)}
        for i in range(self.max_depth):
            for walk in walks.copy():
                if is_reverse:
                    hops = kg.get_hops(walk[0], True)
                    for pred, obj in hops:
                        walks.add((obj, pred) + walk)
                else:
                    hops = kg.get_hops(walk[-1])
                    for pred, obj in hops:
                        walks.add(walk + (pred, obj))

                if len(hops) > 0:
                    walks.remove(walk)
        return list(walks)
Ejemplo n.º 2
0
    def _bfs(
        self, kg: KG, root: Vertex, is_reverse: bool = False
    ) -> List[Walk]:
        """Extracts random walks with Breadth-first search.

        Args:
            kg: The Knowledge Graph.
            root: The root node to extract walks.
            is_reverse: True to get the parent neighbors instead of the child
                neighbors, False otherwise.
                Defaults to False.

        Returns:
            The list of walks for the root node.

        """
        walks: Set[Walk] = {(root,)}
        for i in range(self.max_depth):
            for walk in walks.copy():
                if is_reverse:
                    hops = kg.get_hops(walk[0], True)
                    for pred, obj in hops:
                        walks.add((obj, pred) + walk)
                else:
                    hops = kg.get_hops(walk[-1])
                    for pred, obj in hops:
                        walks.add(walk + (pred, obj))

                if len(hops) > 0:
                    walks.remove(walk)
        return list(walks)
Ejemplo n.º 3
0
    def _extract(
        self, kg: KG, instance: rdflib.URIRef
    ) -> Dict[Any, Tuple[Tuple[str, ...], ...]]:
        """Extracts walks rooted at the provided instances which are then each
        transformed into a numerical representation.

        Args:
            kg: The Knowledge Graph.

                The graph from which the neighborhoods are extracted for the
                provided instances.
            instance: The instance to be extracted from the Knowledge Graph.

        Returns:
            The 2D matrix with its number of rows equal to the number of
            provided instances; number of column equal to the embedding size.

        """
        canonical_walks = set()
        walks = self.extract_random_walks(kg, str(instance))
        for walk in walks:
            kg.get_hops(walk[-1])  # type: ignore

        self._weisfeiler_lehman(kg)
        for n in range(self.wl_iterations + 1):
            for walk in walks:
                canonical_walk = []
                for i, hop in enumerate(walk):  # type: ignore
                    if i == 0 or i % 2 == 1:
                        canonical_walk.append(str(hop))
                    else:
                        canonical_walk.append(self._label_map[hop][n])
                canonical_walks.add(tuple(canonical_walk))
        return {instance: tuple(canonical_walks)}
Ejemplo n.º 4
0
    def sample_neighbor(self, kg: KG, walk, last):
        not_tag_neighbors = [
            x for x in kg.get_hops(walk[-1])
            if (x, len(walk)) not in self.visited
        ]

        # If there are no untagged neighbors, then tag
        # this vertex and return None
        if len(not_tag_neighbors) == 0:
            if len(walk) > 2:
                self.visited.add(((walk[-2], walk[-1]), len(walk) - 2))
            return None

        weights = [self.get_weight(hop) for hop in not_tag_neighbors]
        if self.inverse:
            weights = [max(weights) - (x - min(weights)) for x in weights]
        if self.split:
            weights = [
                w / self.degrees[v[1]]
                for w, v in zip(weights, not_tag_neighbors)
            ]
        weights = [x / sum(weights) for x in weights]

        # Sample a random neighbor and add them to visited if needed.
        rand_ix = np.random.choice(range(len(not_tag_neighbors)), p=weights)
        if last:
            self.visited.add((not_tag_neighbors[rand_ix], len(walk)))
        return not_tag_neighbors[rand_ix]
Ejemplo n.º 5
0
    def _bfs(self,
             kg: KG,
             root: Vertex,
             is_reverse: bool = False) -> List[Walk]:
        """Extracts random walks of depth - 1 hops rooted in root with
        Breadth-first search.

        Args:
            kg: The Knowledge Graph.

                The graph from which the neighborhoods are extracted for the
                provided entities.
            root: The root node to extract walks.
            is_reverse: True to get the parent neighbors instead of the child
                neighbors, False otherwise.
                Defaults to False.

        Returns:
            The list of walks for the root node according to the depth and
            max_walks.

        """
        walks: Set[Walk] = {(root, )}
        for i in range(self.max_depth):
            for walk in walks.copy():
                if is_reverse:
                    hops = kg.get_hops(walk[0], True)
                    for pred, obj in hops:
                        walks.add((obj, pred) + walk)
                        if (obj in self.communities and np.random.RandomState(
                                self.random_state).random() < self.hop_prob):
                            walks.add((np.random.RandomState(self.random_state)
                                       .choice(self.labels_per_community[
                                           self.communities[obj]]), ) + walk)
                else:
                    hops = kg.get_hops(walk[-1])
                    for pred, obj in hops:
                        walks.add(walk + (pred, obj))
                        if (obj in self.communities and np.random.RandomState(
                                self.random_state).random() < self.hop_prob):
                            walks.add(walk +
                                      (np.random.RandomState(self.random_state)
                                       .choice(self.labels_per_community[
                                           self.communities[obj]]), ))
                if len(hops) > 0:
                    walks.remove(walk)
        return list(walks)
Ejemplo n.º 6
0
    def fit(self, kg: KG) -> None:
        """Fits the sampling strategy by running PageRank on a provided KG
        according to the specified damping.

        Args:
            kg: The Knowledge Graph.

        """
        super().fit(kg)
        nx_graph = nx.DiGraph()

        subs_objs = [vertex for vertex in kg._vertices if not vertex.predicate]
        for vertex in subs_objs:
            nx_graph.add_node(vertex.name, vertex=vertex)
            for hop in kg.get_hops(vertex):
                nx_graph.add_edge(vertex.name, hop[1].name, name=hop[0].name)
        self._pageranks = nx.pagerank(nx_graph, alpha=self.alpha)
Ejemplo n.º 7
0
    def sample_hop(
        self, kg: KG, walk: Walk, is_last_hop: bool, is_reverse: bool = False
    ) -> Optional[Hop]:
        """Samples an unvisited random hop in the (predicate, object)
        form, according to the weight of hops for a given walk.

        Args:
            kg: The Knowledge Graph.
            walk: The walk with one or several vertices.
            is_last_hop: True if the next hop to be visited is the last
                one for the desired depth, False otherwise.
            is_reverse: True to get the parent neighbors instead of the child
                neighbors, False otherwise.
                Defaults to False.

        Returns:
            An unvisited hop in the (predicate, object) form.

        """
        subj = walk[0] if is_reverse else walk[-1]

        untagged_neighbors = [
            pred_obj
            for pred_obj in kg.get_hops(subj, is_reverse)
            if (pred_obj, len(walk)) not in self.visited
        ]

        if len(untagged_neighbors) == 0:
            if len(walk) > 2:
                pred_obj = (
                    (walk[1], walk[0]) if is_reverse else (walk[-2], walk[-1])
                )
                self.visited.add((pred_obj, len(walk) - 2))
            return None

        rnd_id = np.random.RandomState(self._random_state).choice(
            range(len(untagged_neighbors)),
            p=self.get_weights(untagged_neighbors),
        )

        if is_last_hop:
            self.visited.add((untagged_neighbors[rnd_id], len(walk)))
        return untagged_neighbors[rnd_id]
Ejemplo n.º 8
0
    def extract_random_walks_bfs(self, kg: KG, root: str):
        """Breadth-first search to extract all possible walks.

        Args:
            kg: The Knowledge Graph.

                The graph from which the neighborhoods are extracted for the
                provided entities.
            root: The root node.

        Returns:
            The list of the walks.

        """
        walks = {(root, )}
        for i in range(self.depth):
            for walk in walks.copy():
                hops = kg.get_hops(walk[-1])
                if len(hops) > 0:
                    walks.remove(walk)
                for (pred, obj) in hops:
                    walks.add(walk + (pred, obj))  # type: ignore
        return list(walks)