Beispiel #1
0
 def _reshape(self, resource: Resource, keep: List[str], versioned: bool) -> Resource:
     # TODO Use as base an implementation of JSONPath for Python. DKE-147.
     levels = [x.split(".", maxsplit=1) for x in keep]
     roots = {x[0] for x in levels}
     new = Resource()
     for root in roots:
         leaves = [x[1] for x in levels if len(x) > 1 and x[0] == root]
         value = getattr(resource, root, None)
         if value is not None:
             if isinstance(value, List):
                 new_value = self._reshape_many(value, leaves, versioned)
                 for i,nv in enumerate(new_value):
                     if nv == Resource() and isinstance(value[i],str):
                         new_value[i] = value[i]
             elif isinstance(value, Resource):
                 if leaves:
                     new_value = self._reshape_one(value, leaves, versioned)
                 else:
                     attributes = value.__dict__.items()
                     properties = {k: v for k, v in attributes if k not in value._RESERVED}
                     new_value = Resource(**properties)
             else:
                 if root == "id" and versioned:
                     new_value = self.versioned_id_template.format(x=resource)
                 else:
                     new_value = value
             setattr(new, root, new_value)
         else:
             pass
     return new
Beispiel #2
0
def nested_registered_resource(nested_resource):
    ingredients = [Resource(id=i, type='Ingredient') for i in range(3)]
    resource = Resource(id="a_recipe",
                        type="Recipe",
                        ingridients=ingredients,
                        author=Resource(id="a_person", type="Person"))
    do_recursive(add_metadata, resource)
    return resource
Beispiel #3
0
    def _add_prov_property(self, resource, prov_type, reference_property,
                           reference_type, keep, versioned, **kwargs):

        if versioned and isinstance(resource, str):
            not_supported(("versioned with resource:str", True))
        if isinstance(resource, str):
            reference = Resource(type=reference_type, id=resource)
        elif isinstance(resource, Resource):
            reference = self._forge.reshape(resource, keep, versioned)
        result = Resource(type=prov_type, **kwargs)
        result.__setattr__(reference_property, reference)
        return result
Beispiel #4
0
def test_reshape(config):
    forge = KnowledgeGraphForge(config)
    reshaper = Reshaper(versioned_id_template="{x.id}?_version={x._store_metadata.version}")

    simple = Resource(type="Experiment", url="file.gz")
    r = reshaper.reshape(simple, keep=['type'],versioned=False)
    expected = { "type": "Experiment"}
    assert expected == forge.as_json(r)

    simple = Resource(type=["Experiment"], url="file.gz")
    r = reshaper.reshape(simple, keep=['type'], versioned=True)
    expected = {"type": ["Experiment"]}
    assert expected == forge.as_json(r)
def test_from_resource(config, person, organization, store_metadata_value):
    forge = KnowledgeGraphForge(config)
    data = {
        'id': 'c51f4e4e-2b30-41f4-8f7c-aced85632b03',
        'type': ['Person', 'Agent'],
        'name': 'Jami Booth'
    }
    assert isinstance(person, Resource)
    dataset = Dataset.from_resource(forge, person)
    assert isinstance(dataset, Dataset)
    assert forge.as_json(dataset) == forge.as_json(person)
    assert forge.as_json(dataset) == data
    assert dataset._store_metadata is None

    person_with_store_metadata = Resource(**forge.as_json(person))
    person_with_store_metadata._store_metadata = store_metadata_value
    dataset = Dataset.from_resource(forge,
                                    person_with_store_metadata,
                                    store_metadata=True)
    assert isinstance(dataset, Dataset)
    person_with_store_metadata_json = forge.as_json(person_with_store_metadata)
    assert forge.as_json(dataset) == person_with_store_metadata_json
    assert forge.as_json(dataset) == data
    assert dataset._store_metadata == person_with_store_metadata._store_metadata
    assert forge.as_json(dataset, store_metadata=False) != forge.as_json(
        person_with_store_metadata, store_metadata=True)
    assert forge.as_json(dataset, store_metadata=True) == forge.as_json(
        person_with_store_metadata, store_metadata=True)

    assert isinstance(organization, Resource)
    dataset = Dataset.from_resource(forge, [person, organization])
    assert isinstance(dataset, List)
    assert len(dataset) == 2
Beispiel #6
0
def building():
    type_ = "Building"
    name = "The Empire State Building"
    description = "The Empire State Building is a 102-story landmark in New York City."
    image = "http://www.civil.usherbrooke.ca/cours/gci215a/empire-state-building.jpg"
    geo = {"latitude": "40.75"}
    return Resource(type=type_, name=name, description=description, image=image, geo=geo)
Beispiel #7
0
def test_collect_values():
    simple = Resource(type="Experiment", url="file.gz")
    r = collect_values(simple, "url")
    assert simple.url in r, "url should be in the list"
    deep = Resource(type="Experiment",
                    level1=Resource(level2=Resource(url="file.gz")))
    r = collect_values(deep, "level1.level2.url")
    assert deep.level1.level2.url in r, "url should be in the list"
    files = [Resource(type="Experiment", url=f"file{i}") for i in range(3)]
    files.append(Resource(type="Experiment", contentUrl=f"file3"))
    r = collect_values(files, "url")
    assert ["file0", "file1",
            "file2"] == r, "three elements should be in the list"
    r = collect_values(files, "contentUrl")
    assert ["file3"] == r, "one element should be in the list"
    data_set = Resource(type="Dataset", hasPart=files)
    r = collect_values(data_set, "hasPart.contentUrl")
    assert ["file3"] == r, "one element should be in the list"
    r = collect_values(data_set, "hasPart.url")
    assert ["file0", "file1",
            "file2"] == r, "three elements should be in the list"
    r = collect_values(data_set, "fake.path")
    assert len(r) == 0
    with pytest.raises(ValueError):
        collect_values(None, "hasPart.url", ValueError)
Beispiel #8
0
 def add_derivation(self,
                    resource: Resource,
                    versioned: bool = True,
                    **kwargs) -> None:
     """Add information on the derivation of an entity resulting in the dataset."""
     keep = ["id", "type", "name"]
     entity = self._forge.reshape(resource, keep, versioned)
     derivation = Resource(type="Derivation", entity=entity, **kwargs)
     _set(self, "derivation", derivation)
Beispiel #9
0
def test_download(config):
    simple = Resource(type="Experiment", url="file.gz")
    with pytest.raises(DownloadingError):
        forge = KnowledgeGraphForge(config)
        forge._store.download(simple,
                              "fake.path",
                              "./",
                              overwrite=False,
                              cross_bucket=False)
Beispiel #10
0
 def _elastic(self, query: str, limit: int, offset: int = None) -> List[Resource]:
     try:
         response = requests.post(
             self.service.elastic_endpoint["endpoint"], data=query, headers=self.service.headers_elastic)
         response.raise_for_status()
     except Exception as e:
         raise QueryingError(e)
     else:
         results = response.json()
         return [Resource(**{k: v for k, v in hit.items()}) for hit in results["hits"]['hits']]
Beispiel #11
0
def _from_json(data: Union[Any, List[Any]], na: List[Any]) -> Any:
    if isinstance(data, List):
        return [_from_json(x, na) for x in data]
    elif isinstance(data, Dict):
        properties = {
            k: _from_json(v, na)
            for k, v in data.items() if v not in na
        }
        return Resource(**properties)
    else:
        return data
def test_execute_lazy_actions():
    fun = lambda x: x
    ra = Resource(pa1="pa1",
                  pa2=Resource(pb1="pb1"),
                  pa3=LazyAction(fun, "pa3 executed"),
                  pa4=Resource(pc1=LazyAction(fun, "pc1 executed"), pc2="pc2"),
                  pa5=[
                      LazyAction(fun, "pa5[0] executed"), 123,
                      Resource(pd1=LazyAction(fun, "pd1 executed")), "string"
                  ])
    la = collect_lazy_actions(ra)
    execute_lazy_actions(ra, la)
    assert ra.pa1 == "pa1"
    assert ra.pa2.pb1 == "pb1"
    assert ra.pa3 == "pa3 executed"
    assert ra.pa4.pc1 == "pc1 executed"
    assert ra.pa4.pc2 == "pc2"
    assert ra.pa5[0] == "pa5[0] executed"
    assert ra.pa5[1] == 123
    assert ra.pa5[2].pd1 == "pd1 executed"
    assert ra.pa5[3] == "string"
Beispiel #13
0
    def _sparql(self, query: str, limit: int, offset: int = None) -> List[Resource]:

        s_offset = "" if offset is None else f"OFFSET {offset}"
        s_limit = "" if limit is None else f"LIMIT {limit}"
        query = f"{query} {s_limit} {s_offset}"
        try:
            response = requests.post(
                self.service.sparql_endpoint["endpoint"], data=query, headers=self.service.headers_sparql)
            response.raise_for_status()
        except Exception as e:
            raise QueryingError(e)
        else:
            data = response.json()
            # FIXME workaround to parse a CONSTRUCT query, this fix depends on
            #  https://github.com/BlueBrain/nexus/issues/1155
            _, q_comp = Query.parseString(query)
            if q_comp.name == "ConstructQuery":
                subject_triples = {}
                for r in data["results"]["bindings"]:
                    subject = r['subject']['value']
                    s = f"<{r['subject']['value']}>"
                    p = f"<{r['predicate']['value']}>"
                    if r["object"]["type"] == "uri":
                        o = f"<{r['object']['value']}>"
                    else:
                        if "datatype" in r["object"]:
                            o = f"\"{r['object']['value']}\"^^{r['object']['datatype']}"
                        else:
                            o = f"\"{r['object']['value']}\""
                    if subject in subject_triples:
                        subject_triples[subject] += f"\n{s} {p} {o} . "
                    else:
                        subject_triples[subject] = f"{s} {p} {o} . "

                def triples_to_resource(iri, triples):
                    graph = Graph().parse(data=triples, format="nt")
                    data_expanded = json.loads(graph.serialize(format="json-ld").decode("utf-8"))
                    frame = {"@id": iri}
                    data_framed = jsonld.frame(data_expanded, frame)
                    context = self.model_context or self.context
                    compacted = jsonld.compact(data_framed, context.document)
                    resource = from_jsonld(compacted)
                    resource.context = context.iri if context.is_http_iri() else context.document["@context"]
                    return resource

                return [triples_to_resource(s, t) for s, t in subject_triples.items()]

            else:
                # SELECT QUERY
                results = data["results"]["bindings"]
                return [Resource(**{k: v["value"] for k, v in x.items()}) for x in results]
Beispiel #14
0
def test_freeze(config, store_metadata_value):

    forge = KnowledgeGraphForge(config, debug=True)
    derivation1 = Dataset(forge, type="Dataset", name="A derivation dataset")
    derivation1.id = "http://derivation1"
    derivation1._store_metadata = wrap_dict(store_metadata_value)

    generation1 = Dataset(forge, type="Dataset", name="A generation dataset")
    generation1.id = "http://generation1"
    generation1._store_metadata = wrap_dict(store_metadata_value)

    invalidation1 = Dataset(forge, type="Activity", name="An invalidation activity")
    invalidation1.id = "http://invalidation1"
    invalidation1._store_metadata = wrap_dict(store_metadata_value)

    contribution1 = Resource(type="Person", name="A contributor")
    contribution1.id = "http://contribution1"
    contribution1._store_metadata = wrap_dict(store_metadata_value)

    dataset = Dataset(forge, type="Dataset", name="A dataset")
    dataset._store_metadata = wrap_dict(store_metadata_value)
    dataset.add_derivation(derivation1, versioned=False)
    dataset.add_generation(generation1, versioned=False)
    dataset.add_invalidation(invalidation1, versioned=False)
    dataset.add_contribution(contribution1, versioned=False)

    expected_derivation = json.loads(json.dumps({"type":"Derivation", "entity":{"id": "http://derivation1",
                                                                                "type":"Dataset", "name":"A derivation dataset"}}))
    assert forge.as_json(dataset.derivation) == expected_derivation

    expected_generation = json.loads(json.dumps({"type": "Generation",
                                                 "activity": {"id": "http://generation1", "type": "Dataset"}}))
    assert forge.as_json(dataset.generation) == expected_generation

    expected_contribution = json.loads(json.dumps({"type": "Contribution",
                                                 "agent": {"id": "http://contribution1", "type": "Person"}}))
    assert forge.as_json(dataset.contribution) == expected_contribution

    expected_invalidation = json.loads(json.dumps({"type": "Invalidation",
                                                   "activity": {"id": "http://invalidation1", "type": "Activity"}}))
    assert forge.as_json(dataset.invalidation) == expected_invalidation

    dataset.id = "http://dataset"
    dataset._synchronized = True
    forge._store.freeze(dataset)
    assert dataset.id == "http://dataset?_version=1"
    assert dataset.derivation.entity.id == "http://derivation1?_version=1"
    assert dataset.generation.activity.id == "http://generation1?_version=1"
    assert dataset.contribution.agent.id == "http://contribution1?_version=1"
    assert dataset.invalidation.activity.id == "http://invalidation1?_version=1"
Beispiel #15
0
def resource(valid: bool, index: int = 0) -> Resource:
    rid = str(uuid4())
    r = Resource(type="Person", id=rid)
    if valid:
        r.name = f"resource {index}"
    return r
Beispiel #16
0
def organization(registered_person_custom_context, store_metadata_value):
    contribution = Resource(type=["Organization", "Agent"], name="Reichel Inc",
                            founder=registered_person_custom_context)
    return contribution
Beispiel #17
0
def person(custom_context):
    return Resource(context=custom_context, type=["Person","Agent"], name="Jami Booth")
Beispiel #18
0
 def add_files(self, path: str, content_type: str = None) -> None:
     # path: DirPath.
     """Add (different) files as parts of the dataset."""
     action = self._forge.attach(path, content_type)
     distribution = Resource(distribution=action)
     _set(self, "hasPart", distribution)
 def valid_activity_resource(self, activity_json):
     resource = Resource(**activity_json)
     resource.id = "http://testing/123"
     return resource
 def test_as_dataframe_expanded(self, forge, df_expanded_from_one_resource):
     r = Resource(type="Person", name="John Doe")
     x = forge.as_dataframe(r, expanded=True)
     assert x.equals(df_expanded_from_one_resource)
 def entity_resource(self):
     return Resource(type="Entity")
Beispiel #22
0
 def add_invalidation(self, **kwargs) -> None:
     """Add information on the invalidation of the dataset."""
     if not kwargs:
         raise TypeError("at least one argument should be given")
     invalidation = Resource(type="Invalidation", **kwargs)
     _set(self, "invalidation", invalidation)
def r5():
    return Resource(p5="v5e", p6="v6e")
Beispiel #24
0
 def add_contribution(self, agent_id: str, **kwargs) -> None:
     # agent: IRI.
     """Add information on the contribution of an agent during the generation of the dataset."""
     agent = Resource(type="Agent", id=agent_id)
     contribution = Resource(type="Contribution", agent=agent, **kwargs)
     _set(self, "contribution", contribution)
Beispiel #25
0
def nested_resource():
    contributions = [Resource(title=f"contribution {i}") for i in range(3)]
    return Resource(type="Agent", name="someone", contributions=contributions)
Beispiel #26
0
 def add_generation(self, **kwargs) -> None:
     """Add information on the activity which has resulted in the generation of the dataset."""
     if not kwargs:
         raise TypeError("at least one argument should be given")
     generation = Resource(type="Generation", **kwargs)
     _set(self, "generation", generation)
 def invalid_activity_resource(self, activity_json):
     return Resource(**activity_json)