コード例 #1
0
 def setup(self):
     for row in GRAPH:
         subj = Vertex(f"{URL}#{row[0]}")
         obj = Vertex((f"{URL}#{row[2]}"))
         pred = Vertex((f"{URL}#{row[1]}"),
                       predicate=True,
                       vprev=subj,
                       vnext=obj)
         LOCAL_KG.add_walk(subj, pred, obj)
コード例 #2
0
    def test_get_neighbors(self):
        # remote_kg = KG(SPARQL_ENDPOINT, is_remote=True)
        for graph in [LOCAL_KG]:
            neighbors = graph.get_hops(f"{URL}#Alice")

            predicates = [neighbor[0] for neighbor in neighbors]
            assert {str(predicate)
                    for predicate in predicates} == {f"{URL}#knows"}

            objects = [neighbor[1] for neighbor in neighbors]
            assert Vertex(f"{URL}#Bob") in objects
            assert Vertex(f"{URL}#Dean") in objects
コード例 #3
0
ファイル: test_graph.py プロジェクト: sw478/pyRDF2Vec
    def test_get_neighbors(self):
        for graph in [LOCAL_KG, REMOTE_KG]:
            neighbors = graph.get_hops(f"{URL}#Alice")

            predicates = [neighbor[0] for neighbor in neighbors]
            assert {str(predicate) for predicate in predicates} == {
                f"{URL}#knows"
            }

            objects = [neighbor[1] for neighbor in neighbors]
            assert Vertex(f"{URL}#Bob") in objects
            assert Vertex(f"{URL}#Dean") in objects
コード例 #4
0
 def setup(self):
     for i, graph in enumerate([LOOP, LONG_CHAIN]):
         for row in graph:
             subj = Vertex(f"{URL}#{row[0]}")
             obj = Vertex((f"{URL}#{row[2]}"))
             pred = Vertex(
                 (f"{URL}#{row[1]}"), predicate=True, vprev=subj, vnext=obj
             )
             if i == 0:
                 KG_LOOP.add_walk(subj, pred, obj)
             else:
                 KG_CHAIN.add_walk(subj, pred, obj)
コード例 #5
0
 def test_extract(
     self,
     setup,
     kg,
     root,
     max_depth,
     max_walks,
     with_reverse,
     wl_iterations,
 ):
     root = f"{URL}#{root}"
     walker = WLWalker(
         max_depth,
         max_walks,
         with_reverse=with_reverse,
         random_state=42,
         wl_iterations=wl_iterations,
     )
     walker._weisfeiler_lehman(kg)
     walks = walker._extract(kg, Vertex(root))[root]
     if max_walks is not None:
         if not with_reverse:
             assert len(walks) <= (max_walks * wl_iterations) + max(
                 max_depth, max_walks)
     for walk in walks:
         if not with_reverse:
             assert walk[0] == root
コード例 #6
0
ファイル: ngram.py プロジェクト: Qawasmeh-omar/pyRDF2Vec
    def _extract(self, kg: KG, instance: Vertex) -> EntityWalks:
        """Extracts walks rooted at the provided entities which are then each
        transformed into a numerical representation.

        Args:
            kg: The Knowledge Graph.
            instance: The instance to be extracted from the Knowledge Graph.

        Returns:
            The 2D matrix with its number of rows equal to the number of
            provided entities; number of column equal to the embedding size.

        """
        canonical_walks: Set[SWalk] = set()
        for walk in self.extract_walks(kg, instance):
            canonical_walks.add(tuple(self._take_n_grams(walk)))

            # Introduce wild-cards and re-calculate n-grams
            if self.wildcards is None:
                continue

            for wildcard in self.wildcards:
                for idx in itertools.combinations(range(1, len(walk)),
                                                  wildcard):
                    new_walk = list(walk).copy()
                    for ix in idx:
                        new_walk[ix] = Vertex("*")
                    canonical_walks.add(
                        tuple(self._take_n_grams(new_walk))  # type: ignore
                    )
        return {instance.name: list(canonical_walks)}
コード例 #7
0
    def _extract(self, kg: KG, entity: Vertex) -> EntityWalks:
        """Extracts random walks for an entity based on a Knowledge Graph.

        Args:
            kg: The Knowledge Graph.
            entity: The root node to extract walks.

        Returns:
            A dictionary having the entity as key and a list of tuples as value
            corresponding to the extracted walks.

        """
        canonical_walks: Set[SWalk] = set()
        for walk in self.extract_walks(kg, entity):
            canonical_walks.add(tuple(self._take_n_grams(walk)))

            # Introduce wild-cards and re-calculate n-grams
            if self.wildcards is None:
                continue

            for wildcard in self.wildcards:
                for idx in itertools.combinations(range(1, len(walk)),
                                                  wildcard):
                    new_walk = list(walk).copy()
                    for ix in idx:
                        new_walk[ix] = Vertex("*")
                    canonical_walks.add(
                        tuple(self._take_n_grams(new_walk))  # type: ignore
                    )
        return {entity.name: list(canonical_walks)}
コード例 #8
0
 def test_weight(self, setup, kg, root, is_reverse, sampler, is_inverse,
                 is_split):
     sampler = sampler(is_inverse, is_split)
     sampler.fit(kg)
     for hop in kg.get_hops(Vertex(f"{URL}#{root}"), is_reverse=is_reverse):
         if isinstance(sampler, ObjFreqSampler):
             assert sampler.get_weight(hop) <= 4
コード例 #9
0
 def test_weight(self, setup, kg, root):
     sampler = WideSampler()
     sampler.fit(kg)
     for hop in kg.get_hops(Vertex(f"{URL}#{root}")):
         weight = sampler.get_weight(hop)
         assert weight > 0
         assert isinstance(weight, float)
コード例 #10
0
 def test_weight(self, setup, kg, root, is_reverse, alpha, is_inverse,
                 is_split):
     sampler = PageRankSampler(alpha=alpha,
                               inverse=is_inverse,
                               split=is_split)
     sampler.fit(kg)
     for hop in kg.get_hops(Vertex(f"{URL}#{root}"), is_reverse=is_reverse):
         assert sampler.get_weight(hop) <= alpha
コード例 #11
0
 def test_dfs(self, setup, kg, root, max_depth, max_walks, is_reverse):
     root = f"{URL}#{root}"
     for walk in RandomWalker(max_depth, max_walks, random_state=42)._dfs(
         kg, Vertex(root), is_reverse
     ):
         assert len(walk) <= (max_depth * 2) + 1
         if is_reverse:
             assert walk[-1].name == root
         else:
             assert walk[0].name == root
コード例 #12
0
 def test_extract(self, setup, kg, root, max_depth, max_walks,
                  with_reverse):
     root = f"{URL}#{root}"
     walks = WalkletWalker(max_depth, max_walks,
                           with_reverse=with_reverse)._extract(
                               kg, Vertex(root))[root]
     for walk in walks:
         assert len(walk) == 1 or len(walk) == 2
         if not with_reverse:
             assert walk[0] == root
コード例 #13
0
 def test_dfs(self, setup, kg, root, max_depth, max_walks, is_reverse):
     root = f"{URL}#{root}"
     walker = CommunityWalker(max_depth, max_walks, random_state=42)
     walker._community_detection(kg)
     walks = walker._dfs(kg, Vertex(root), is_reverse)
     for walk in walks:
         assert len(walk) <= (max_depth * 2) + 1
         if is_reverse:
             assert walk[-1].name == root
         else:
             assert walk[0].name == root
コード例 #14
0
    def _proc(self, entity: str) -> EntityWalks:
        """Executed by each process.

        Args:
            entity: The entity to be extracted from the Knowledge Graph.

        Returns:
            The extraction of walk by the process.

        """
        global kg
        return self._extract(kg, Vertex(entity))  # type: ignore
コード例 #15
0
    def test_get_hops(self, setup):
        neighbors = LOCAL_KG.get_hops(Vertex(f"{URL}#Alice"))
        predicates = [neighbor[0] for neighbor in neighbors]
        objects = [neighbor[1] for neighbor in neighbors]

        assert len(neighbors) == 2
        assert len(predicates) == 2
        assert len(objects) == 2

        assert {predicate.name for predicate in predicates} == {f"{URL}#knows"}
        assert Vertex(f"{URL}#Bob") in objects
        assert Vertex(f"{URL}#Dean") in objects

        neighbors = LOCAL_KG.get_hops(Vertex(f"{URL}#Bob"), is_reverse=True)
        predicates = [neighbor[0] for neighbor in neighbors]
        objects = [neighbor[1] for neighbor in neighbors]

        assert len(neighbors) == 1
        assert len(predicates) == 1
        assert len(objects) == 1
        assert Vertex(f"{URL}#Alice") in objects
コード例 #16
0
    def test_get_weights(self, setup, kg, root, sampler, is_reverse,
                         is_inverse, is_split):
        if "UniformSampler" in str(sampler):
            sampler = sampler()
        else:
            sampler = sampler(is_inverse, is_split)
        sampler.fit(kg)

        weights = sampler.get_weights(
            kg.get_hops(Vertex(f"{URL}#{root}"), is_reverse=is_reverse))
        assert isinstance(weights, list)
        if len(weights) > 0:
            for weight in weights:
                assert weight <= 1
コード例 #17
0
 def test_res2hops(self, setup):
     subj = Vertex("subj")
     obj = Vertex("obj")
     hops = LOCAL_KG._res2hops(
         subj,
         [
             {
                 "o": {
                     "type": "literal",
                     "xml:lang": "en",
                     "value": "obj",
                 },
                 "p": {
                     "type": "literal",
                     "xml:lang": "en",
                     "value": "pred",
                 },
             },
         ],
     )
     assert isinstance(hops, list)
     assert len(hops) == 1
     pred = Vertex("pred", predicate=True, vprev=subj, vnext=obj)
     assert hops == [(pred, obj)]
コード例 #18
0
    def test_remove_edge(self, setup):
        vtx_alice = Vertex(f"{URL}#Alice")

        neighbors = LOCAL_KG.get_hops(vtx_alice)
        assert len(LOCAL_KG.get_hops(vtx_alice)) == 2

        predicates = [
            vertex for hops in neighbors for vertex in hops
            if vertex.predicate is True
        ]

        with pytest.raises(ValueError):
            LOCAL_KG._is_remote = True
            LOCAL_KG.remove_edge(vtx_alice, predicates[0])
        LOCAL_KG._is_remote = False

        assert LOCAL_KG.remove_edge(vtx_alice, predicates[0]) is True
        assert len(LOCAL_KG.get_hops(vtx_alice)) == 1

        assert LOCAL_KG.remove_edge(vtx_alice, predicates[1]) is True
        assert len(LOCAL_KG.get_hops(Vertex(f"{URL}#Alice"))) == 0

        assert (LOCAL_KG.remove_edge(vtx_alice, Vertex(f"{URL}#Unknown")) is
                False)
コード例 #19
0
ファイル: rdf2vec.py プロジェクト: Qawasmeh-omar/pyRDF2Vec
    def get_walks(self, kg: KG, entities: Entities) -> List[List[SWalk]]:
        """Gets the walks of an entity based on a Knowledge Graph and a
        list of walkers

        Args:
            kg: The Knowledge Graph.
            entities: The entities including test entities to create the
                embeddings. Since RDF2Vec is unsupervised, there is no label
                leakage.

        Returns:
            The walks for the given entities.

        Raises:
            ValueError: If the provided entities aren't in the Knowledge Graph.

        """
        if not kg._is_remote and not all(
            [Vertex(entity) in kg._vertices for entity in entities]):
            raise ValueError(
                "The provided entities must be in the Knowledge Graph.")

        # Avoids duplicate entities for unnecessary walk extractions.
        entities = list(set(entities))

        if self.verbose == 2:
            print(kg)
            print(self.walkers[0])

        walks: List[List[SWalk]] = []
        tic = time.perf_counter()
        for walker in self.walkers:
            walks += walker.extract(kg, entities, self.verbose)
        toc = time.perf_counter()

        self._update(self._entities, entities)
        self._update(self._walks, walks)

        if self.verbose >= 1:
            n_walks = sum([len(entity_walks) for entity_walks in walks])
            print(f"Extracted {n_walks} walks " +
                  f"for {len(entities)} entities ({toc - tic:0.4f}s)")
        if (kg._is_remote and kg.mul_req
                and not self._is_extract_walks_literals):
            asyncio.run(kg.connector.close())
        return walks
コード例 #20
0
ファイル: rdf2vec.py プロジェクト: rememberYou/pyRDF2Vec
    def fit(self,
            kg: KG,
            entities: List[rdflib.URIRef],
            verbose: bool = False) -> "RDF2VecTransformer":
        """Fits the embedding network based on provided entities.

        Args:
            kg: The Knowledge Graph.
                The graph from which the neighborhoods are extracted for the
                provided entities.
            entities: The entities to create the embedding.
                The test entities should be passed to the fit method as well.

                Due to RDF2Vec being unsupervised, there is no label leakage.
            verbose: If true, display a progress bar for the extraction of the
                walks and display the number of these extracted walks for the
                number of entities with the extraction time.
                Defaults to False.

        Returns:
            The RDF2VecTransformer.

        """
        if not kg.is_remote and not all(
            [Vertex(str(entity)) in kg._vertices for entity in entities]):
            raise ValueError(
                "The provided entities must be in the Knowledge Graph.")

        if verbose:
            print(self.walkers[0].info())  # type: ignore

        tic = time.perf_counter()
        for walker in self.walkers:  # type: ignore
            self.walks_ += list(walker.extract(kg, entities, verbose))
        toc = time.perf_counter()
        corpus = [list(map(str, walk)) for walk in self.walks_]

        if verbose:
            print(f"Extracted {len(self.walks_)} walks " +
                  f"for {len(entities)} entities! ({toc - tic:0.4f}s)")

        self.embedder.fit(corpus)
        return self
コード例 #21
0
 def test_add_walk(self):
     kg = KG(skip_predicates={f"{URL}#predicate"})
     for row in GRAPH:
         subj = Vertex(f"{URL}#{row[0]}")
         obj = Vertex((f"{URL}#{row[2]}"))
         pred = Vertex((f"{URL}#{row[1]}"),
                       predicate=True,
                       vprev=subj,
                       vnext=obj)
         assert kg.add_walk(subj, pred, obj) is True
     subj = Vertex(f"{URL}#{GRAPH[0][0]}")
     obj = Vertex(f"{URL}#{GRAPH[0][2]}")
     pred = Vertex(f"{URL}#predicate")
     assert kg.add_walk(subj, pred, obj) is False
コード例 #22
0
ファイル: rdf2vec.py プロジェクト: sw478/pyRDF2Vec
    def fit(
        self,
        kg: KG,
        entities: List[rdflib.URIRef],
        verbose: bool = False,
    ) -> "RDF2VecTransformer":
        """Fits the embedding network based on provided entities.

        Args:
            kg: The Knowledge Graph.
                The graph from which the neighborhoods are extracted for the
                provided entities.
            entities: The entities to create the embedding.
                The test entities should be passed to the fit method as well.

                Due to RDF2Vec being unsupervised, there is no label leakage.
            verbose: If true, display the number of extracted walks for the
                number of entities. Defaults to false.

        Returns:
            The RDF2VecTransformer.

        """
        if kg.is_remote is False and not all(
            [Vertex(str(entity)) in kg._vertices for entity in entities]
        ):
            raise ValueError(
                "The provided entities must be in the Knowledge Graph."
            )

        for walker in self.walkers:
            self.walks_ += list(walker.extract(kg, entities))
        corpus = [list(map(str, x)) for x in self.walks_]

        if verbose:
            print(
                f"Extracted {len(self.walks_)} walks "
                + f"for {len(entities)} entities!"
            )

        self.embedder.fit(corpus)
        return self
コード例 #23
0
ファイル: test_halk.py プロジェクト: Qawasmeh-omar/pyRDF2Vec
 def test_extract(self, setup, kg, root, max_depth, max_walks,
                  with_reverse):
     root = f"{URL}#{root}"
     walks = HALKWalker(
         max_depth,
         max_walks,
         freq_thresholds=[0.01],
         with_reverse=with_reverse,
         random_state=42,
     )._extract(kg, Vertex(root))[root]
     if max_walks is not None:
         if with_reverse:
             assert len(walks) <= max_walks * max_walks
         else:
             assert len(walks) <= max_walks
     for walk in walks:
         if not with_reverse:
             assert walk[0] == root
         for pred_or_obj in walk[1:]:
             assert pred_or_obj.startswith("b'")
コード例 #24
0
 def test_extract(
     self, setup, kg, root, max_depth, max_walks, with_reverse
 ):
     root = f"{URL}#{root}"
     walks = RandomWalker(
         max_depth, max_walks, with_reverse=with_reverse, random_state=42
     )._extract(kg, Vertex(root))[root]
     if max_walks is not None:
         if with_reverse:
             assert len(walks) <= max_walks * max_walks
         else:
             assert len(walks) <= max_walks
     for walk in walks:
         for obj in walk[2::2]:
             assert obj.startswith("b'")
         if not with_reverse:
             assert walk[0] == root
             assert len(walk) <= (max_depth * 2) + 1
         else:
             assert len(walk) <= ((max_depth * 2) + 1) * 2
コード例 #25
0
ファイル: ngrams.py プロジェクト: sw478/pyRDF2Vec
    def _extract(
        self, graph: KG, instances: List[rdflib.URIRef]
    ) -> Set[Tuple[Dict[Tuple[Any, ...], str], ...]]:
        """Extracts walks rooted at the provided instances which are then each
        transformed into a numerical representation.

        Args:
            graph: The knowledge graph.

                The graph from which the neighborhoods are extracted for the
                provided instances.
            instances: The instances to extract the knowledge graph.

        Returns:
            The 2D matrix with its number of rows equal to the number of
            provided instances; number of column equal to the embedding size.

        """
        canonical_walks = set()
        for instance in instances:
            walks = self.extract_random_walks(graph, str(instance))
            for walk in walks:
                canonical_walks.add(
                    tuple(self._take_n_grams(walk))  # type:ignore
                )

                # Introduce wild-cards and re-calculate n-grams
                if self.wildcards is None:
                    continue

                for wildcard in self.wildcards:
                    for idx in itertools.combinations(
                        range(1, len(walk)), wildcard  # type: ignore
                    ):
                        new_walk = list(walk).copy()  # type: ignore
                        for ix in idx:
                            new_walk[ix] = Vertex("*")
                        canonical_walks.add(
                            tuple(self._take_n_grams(new_walk))
                        )
        return canonical_walks
コード例 #26
0
ファイル: test_ngram.py プロジェクト: Qawasmeh-omar/pyRDF2Vec
    def test_extract(self, setup, kg, root, max_depth, max_walks,
                     with_reverse):
        root = f"{URL}#{root}"
        walks = NGramWalker(
            max_depth,
            max_walks,
            with_reverse=with_reverse,
            random_state=42,
            grams=2,
        )._extract(kg, Vertex(root))[root]
        if max_walks is not None:
            if with_reverse:
                assert len(walks) <= max_walks * max_walks
            else:
                assert len(walks) <= max_walks
        for walk in walks:
            if not with_reverse:
                assert walk[0] == root

        for walk in walks[2::2]:
            if len(walk) > 1:
                assert walk[2].isnumeric()
コード例 #27
0
    def test_get_neighbors(self, setup):
        alice_predicates = [
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Alice"))
        ]
        assert len(alice_predicates) == 2
        assert Vertex(f"{URL}#Alice") == alice_predicates[0].vprev
        assert Vertex(f"{URL}#Bob") and Vertex(f"{URL}#Dean") in {
            alice_predicates[0].vnext,
            alice_predicates[1].vnext,
        }
        assert Vertex(f"{URL}#Alice") == alice_predicates[1].vprev
        assert (len([
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Alice"),
                                                   is_reverse=True)
        ]) == 0)

        bob_predicates = [
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Bob"))
        ]
        assert len(bob_predicates) == 1
        assert Vertex(f"{URL}#Bob") == bob_predicates[0].vprev
        assert Vertex(f"{URL}#Casper") == bob_predicates[0].vnext

        bob_predicates = [
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Bob"),
                                                   is_reverse=True)
        ]
        assert len(bob_predicates) == 1
        assert Vertex(f"{URL}#Bob") == bob_predicates[0].vnext
        assert Vertex(f"{URL}#Alice") == bob_predicates[0].vprev

        dean_predicates = [
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Dean"),
                                                   is_reverse=True)
        ]
        assert len(dean_predicates) == 1
        assert Vertex(f"{URL}#Dean") == dean_predicates[0].vnext
        assert Vertex(f"{URL}#Alice") == dean_predicates[0].vprev
        assert (len([
            neighbor
            for neighbor in LOCAL_KG.get_neighbors(Vertex(f"{URL}#Dean"))
        ]) == 0)
コード例 #28
0
 def test_fetch_hops(self):
     assert LOCAL_KG.fetch_hops(Vertex(f"{URL}#Alice")) == []
コード例 #29
0
ファイル: test_graph.py プロジェクト: sw478/pyRDF2Vec
import multiprocessing
import os
import sys
import time

import pytest
import rdflib

from pyrdf2vec.graphs import KG, Vertex
from tests.rdflib_web.lod import serve

a = Vertex("a")
b = Vertex("b")
c = Vertex("c", predicate=True, vprev=a, vnext=b)


class TestVertex:
    def test_eq(self):
        assert a == a

    def test_eq_with_none(self):
        assert a is not None

    def test_id_incremental(self):
        assert b.id == 1

    def test_id_init(self):
        assert a.id == 0

    def test_neq(self):
        assert a != b
コード例 #30
0
 def test_weight(self, setup, kg, root, is_reverse):
     sampler = UniformSampler()
     for hop in kg.get_hops(Vertex(f"{URL}#{root}"), is_reverse=is_reverse):
         assert sampler.get_weight(hop) == 1