def test_round_trip(self, kg_client): trace1 = Trace("example001", data_location=Distribution( "http://example.com/example.csv", content_type="text/tab-separated-values"), generated_by=MockKGObject( id="http://fake_uuid_abc123", type=PatchClampExperiment.type), generation_metadata=MockKGObject( id="http://fake_uuid_def456", type=QualifiedTraceGeneration.type), channel=42, data_unit="mV", time_step=QuantitativeValue(0.1, "ms"), part_of=MockKGObject(id="http://fake_uuid_ghi789", type=Dataset.type)) instance = Instance(Trace.path, trace1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_6a5d6ecf87" instance.data["@type"] = Trace.type trace2 = Trace.from_kg_instance(instance, kg_client) for field in ("name", "data_location", "channel", "data_unit", "time_step"): assert getattr(trace1, field) == getattr(trace2, field) for field in ("generated_by", "generation_metadata", "part_of"): obj1 = getattr(trace1, field) obj2 = getattr(trace2, field) assert isinstance(obj2, KGProxy) assert obj1.id == obj2.id assert obj1.type == obj2.type
def instance_from_full_uri(self, uri, cls=None, use_cache=True, deprecated=False, api="query", scope="released", resolved=False): # 'deprecated=True' means 'returns an instance even if that instance is deprecated' # should perhaps be called 'show_deprecated' or 'include_deprecated' logger.debug( "Retrieving instance from {}, api='{}' use_cache={}".format( uri, api, use_cache)) if use_cache and uri in self.cache: logger.debug("Retrieving instance {} from cache".format(uri)) instance = self.cache[uri] elif api == "nexus": instance = Instance(Instance.extract_id_from_url( uri, self._instance_repo.path), data=self._instance_repo._http_client.get(uri), root_path=Instance.path) if instance and instance.data and "@id" in instance.data: if deprecated is False and instance.data["nxv:deprecated"]: instance = None logger.debug("Not returning deprecated instance") else: self.cache[instance.data["@id"]] = instance logger.debug("Retrieved instance from KG Nexus" + str(instance.data)) else: instance = None elif api == "query": if cls and hasattr(cls, "query_id") and cls.query_id is not None: if resolved: query_id = cls.query_id_resolved else: query_id = cls.query_id response = self._kg_query_client.get( "{}/{}/instances?databaseScope={}&id={}".format( cls.path, query_id, SCOPE_MAP[scope], uri)) if response and len(response["results"]) > 0: instance = Instance(cls.path, response["results"][0], Instance.path) self.cache[instance.data["@id"]] = instance logger.debug("Retrieved instance from KG Query" + str(instance.data)) else: logger.warning( "Instance not found at {} using KG Query API".format( uri)) instance = None else: raise NotImplementedError( "No query id available: cls={}".format(str(cls))) else: raise ValueError("'api' must be either 'nexus' or 'query'") return instance
def test_round_trip(self, kg_client): p1 = Person("Hamilton", "Margaret", "*****@*****.**", KGProxy(Organization, "http://fake_uuid_855fead8")) instance = Instance(Person.path, p1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_8ab3dc739b" instance.data["@type"] = Person.type p2 = Person.from_kg_instance(instance, kg_client) for field in ("family_name", "given_name", "email", "affiliation", "full_name"): assert getattr(p1, field) == getattr(p2, field)
def instance_from_full_uri(self, uri): if uri in self.cache: return self.cache[uri] else: instance = Instance(Instance.extract_id_from_url( uri, self._instance_repo.path), data=self._instance_repo._http_client.get(uri), root_path=Instance.path) self.cache[instance.data["@id"]] = instance return instance
def test_round_trip(self, kg_client): obj1 = Organization(name="NeuroPSI", address=Address(locality="Saclay", country="France"), parent=KGProxy(Organization, "http://fake_uuid_00481be7a1")) instance = Instance(Organization.path, obj1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_7bb3c1e78b" instance.data["@type"] = Organization.type obj2 = Organization.from_kg_instance(instance, kg_client) for field in ("name", "address", "parent"): assert getattr(obj1, field) == getattr(obj2, field)
def test_round_trip(self, kg_client): obj1 = Subject(name="Mickey", species=Species("Mus musculus"), strain=Strain("129/Sv"), sex=Sex("male"), age=Age(QuantitativeValue(20, "days"), "Post-natal"), death_date=datetime(1960, 1, 1)) instance = Instance(Subject.path, obj1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_9ab2227fe1" instance.data["@type"] = Subject.type obj2 = Subject.from_kg_instance(instance, kg_client) for field in ("name", "species", "strain", "sex", "age", "death_date"): assert getattr(obj1, field) == getattr(obj2, field)
def instance_from_full_uri(self, uri, use_cache=True): if use_cache and uri in self.cache: logger.debug("Retrieving instance from cache") return self.cache[uri] else: instance = Instance(Instance.extract_id_from_url( uri, self._instance_repo.path), data=self._instance_repo._http_client.get(uri), root_path=Instance.path) self.cache[instance.data["@id"]] = instance logger.debug("Retrieved instance from KG " + str(instance.data)) return instance
def query_kgquery(self, path, query_id, filter, from_index=0, size=100, scope="released"): template = "{}/{}/instances?start={{}}&size={}&databaseScope={}".format( path, query_id, size, SCOPE_MAP[scope]) if filter: for key, value in filter.items(): if hasattr(value, "iri"): filter[key] = value.iri template += "&" + "&".join( "{}={}".format(k, quote_plus(v.encode("utf-8"))) for k, v in filter.items()) if scope not in SCOPE_MAP: raise ValueError("'scope' must be either '{}'".format( "' or '".join(list(SCOPE_MAP)))) start = from_index #url = quote_plus(template.format(start).encode("utf-8")) url = template.format(start) try: response = self._kg_query_client.get(url) except HTTPError as err: if err.response.status_code == 403: response = None else: raise if response and "results" in response: instances = [ Instance(path, data, Instance.path) for data in response["results"] ] start += response["size"] while start < min(response["total"], size): #url = quote_plus(template.format(start).encode("utf-8")) url = template.format(start) response = self._kg_query_client.get(url) instances.extend([ Instance(path, data, Instance.path) for data in response["results"] ]) start += response["size"] else: instances = [] for instance in instances: self.cache[instance.data["@id"]] = instance instance.data["fg:api"] = "query" return instances
def create_instance_by_file(self, file_path, fully_qualify=False): """Create a new instance for the provided data Arguments: file_path -- path to the location of the file to be uploaded as instance fully_qualify -- if True, prefixes are resolved and the JSON-LD to be uploaded will be interpretable as JSON (but with non-human-friendly, fully qualified keys) """ with open(os.path.abspath(file_path)) as metadata_file: file_content = metadata_file.read() raw_json = self.__resolve_entities(file_content) raw_json = self.__fill_placeholders(raw_json) if fully_qualify: final_json = Entity.fully_qualify(json.loads(raw_json)) else: final_json = json.loads(raw_json) if type( raw_json) is not dict else raw_json schema_data = SchemaOrContextData.by_filepath( file_path, final_json) schema_identifier = "http://schema.org/identifier" if self._upload_fully_qualified: raw_json = final_json instance = Instance.create_new(schema_data.organization, schema_data.domain, schema_data.name, schema_data.version, raw_json) if schema_identifier in final_json: checksum = instance.get_checksum() checksum_file = "{}.{}.chksum".format(file_path, checksum) if os.path.exists(checksum_file): LOGGER.debug("{} is unchanged - no upload required".format( file_path)) return identifier = final_json.get(schema_identifier) if type(identifier) is list: identifier = identifier[0] found_instances = self._client.instances.find_by_field( instance.id, schema_identifier, identifier) if found_instances and len(found_instances.results) > 0: instance.path = found_instances.results[0].self_link instance.id = Instance.extract_id_from_url( instance.path, instance.root_path) result = self._client.instances.update(instance) with open(checksum_file, 'a') as checksum_file: checksum_file.close() return result return self._client.instances.create( Instance.create_new(schema_data.organization, schema_data.domain, schema_data.name, schema_data.version, raw_json))
def test_create_turtle_instance(self): organization = self.client.organizations.read("test") if organization is None: self.client.organizations.create( Organization.create_new("test", "An organization for tests")) domain = self.client.domains.read("test", "core") if domain is None: self.client.domains.create( Domain.create_new("test", "core", "A domain for tests")) schema = self.client.schemas.read("test", "core", "turtle", "v0.0.4") if schema is None: schema = self.client.schemas.create( Schema.create_new("test", "core", "turtle", "v0.0.4", self.test_turtle_schema, is_turtle=True)) if not schema.is_published(): self.client.schemas.publish(schema, True) instance = self.client.instances.create( Instance.create_new("test", "core", "turtle", "v0.0.4", self.test_turtle_instance, is_turtle=True)) instance = self.client.instances.read(instance.get_organization(), instance.get_domain(), instance.get_schema(), instance.get_version(), instance.get_id()) print instance
def by_name(self, cls, name, match="equals", all=False, api="query", scope="released", resolved=False): """Retrieve an object based on the value of schema:name""" # todo: allow non-exact searches if api not in ("query", "nexus"): raise ValueError("'api' must be either 'nexus' or 'query'") valid_match_methods = { #"query": ("starts_with", "ends_with", "contains", "equals", "regex"), "query": ("contains", "equals"), "nexus": ("equals") } if match not in valid_match_methods[api]: raise ValueError("'match' must be one of {}".format( valid_match_methods[api])) if api == "nexus": op = {"equals": "eq", "contains": "in"}[match] context = {"schema": "http://schema.org/"} query_filter = {"path": "schema:name", "op": op, "value": name} instances = self.query_nexus(cls.path, query_filter, context) else: assert api == "query" if hasattr(cls, "query_id") and cls.query_id is not None: if resolved: query_id = cls.query_id_resolved else: query_id = cls.query_id response = self._kg_query_client.get( "{}/{}{}/instances?databaseScope={}&name={}".format( cls.path, query_id, match == "contains" and "_name_contains" or "", # workaround SCOPE_MAP[scope], name)) instances = [ Instance(cls.path, result, Instance.path) for result in response["results"] ] else: raise NotImplementedError( "Coming soon. For now, please use api='nexus'") if instances: if all: return [ cls.from_kg_instance(inst, self, resolved=resolved) for inst in instances ] else: # return only the first result return cls.from_kg_instance(instances[0], self, resolved=resolved) else: return None
def create(self, entity): result = self._http_client.post(entity.path, entity.data) if result is None: raise ValueError("Entity was not created") entity.data = result entity.id = Instance.extract_id_from_url(result.get("@id"), self.path) entity.build_path() return entity
def test_create_and_deprecate(self): entity = Instance.create_new(self.default_prefix, "core", "schematest", "v0.0.1", self.test_instance) result = self.repository.create(entity) assert_that(result, equal_to(entity)) self._assert_valid_default_entity(result, entity.id) assert_that(result.get_revision(), equal_to(1)) self._test_deprecate(entity)
def create_instance(self, data, schema_data, fail_if_linked_instance_is_missing=True): """Create a new instance for the provided data Arguments: file_path -- path to the location of the file to be uploaded as instance fully_qualify -- if True, prefixes are resolved and the JSON-LD to be uploaded will be interpretable as JSON (but with non-human-friendly, fully qualified keys) """ raw_json = self.resolve_entities( data if not isinstance(data, dict) else json.dumps(data), fail_if_linked_instance_is_missing) raw_json = self._fill_placeholders(raw_json) fully_qualified_json = Entity.fully_qualify(json.loads(raw_json)) if not self._upload_fully_qualified: final_json = json.loads(raw_json) if not isinstance( raw_json, dict) else raw_json else: final_json = fully_qualified_json schema_identifier = "http://schema.org/identifier" hashcode_field = "http://hbp.eu/internal#hashcode" if self._upload_fully_qualified: raw_json = final_json instance = Instance.create_new(schema_data.organization, schema_data.domain, schema_data.name, schema_data.version, raw_json) if hashcode_field not in fully_qualified_json: current_hashcode = Entity.do_get_checksum(fully_qualified_json) fully_qualified_json[hashcode_field] = current_hashcode instance.data[hashcode_field] = current_hashcode else: current_hashcode = fully_qualified_json[hashcode_field] if schema_identifier in fully_qualified_json: identifier = fully_qualified_json.get(schema_identifier) result = self.handle_known_schema_identifier( schema_identifier, instance, hashcode_field, current_hashcode, identifier) if result is not None: return result return self._client.instances.create( Instance.create_new(schema_data.organization, schema_data.domain, schema_data.name, schema_data.version, raw_json))
def test_round_trip_with_morphology_file(self, kg_client): cls = self.class_under_test obj1 = cls("test_morph", morphology_file="http://example.com/test.asc") instance = Instance(cls.path, obj1._build_data(kg_client), Instance.path) instance.data["@id"] = random_uuid() instance.data["@type"] = cls.type obj2 = cls.from_kg_instance(instance, kg_client) for field in cls.fields: if field.intrinsic: val1 = getattr(obj1, field.name) val2 = getattr(obj2, field.name) if issubclass(field.types[0], KGObject): assert isinstance(val1, MockKGObject) assert isinstance(val2, KGProxy) assert val1.type == val2.cls.type else: assert val1 == val2 assert obj1.morphology_file == obj2.morphology_file
def test_round_trip_minimal_random(self, kg_client): cls = self.class_under_test if cls.fields: obj1 = generate_random_object(cls, all_fields=False) instance = Instance(cls.path, obj1._build_data(kg_client), Instance.path) instance.data["@id"] = random_uuid() instance.data["@type"] = cls.type obj2 = cls.from_kg_instance(instance, kg_client) for field in cls.fields: if field.intrinsic and field.required: val1 = getattr(obj1, field.name) val2 = getattr(obj2, field.name) if issubclass(field.types[0], KGObject): assert isinstance(val1, MockKGObject) assert isinstance(val2, KGProxy) assert val1.type == val2.cls.type elif date in field.types: assert dates_equal(val1, val2) else: assert val1 == val2
def create(self, entity): print("entitypath", entity.path) print("entitydata", entity.data) result = self._http_client.post(entity.path, entity.data) if result is None: raise ValueError("Entity was not created") else: self.logger.info("Instance created: %s", entity.path) entity.data = result entity.id = Instance.extract_id_from_url(result.get("@id"), self.path) entity.build_path() return entity
def test_round_trip(self, kg_client): cell1 = PatchedCell("example001", brain_location=BrainRegion("primary auditory cortex"), collection=None, cell_type=CellType("pyramidal cell"), experiments=None, pipette_id=31, seal_resistance=QuantitativeValue(1.2, "GΩ"), pipette_resistance=QuantitativeValue(1.5, "MΩ"), liquid_junction_potential=QuantitativeValue(5.0, "mV"), labeling_compound="0.1% biocytin ", reversal_potential_cl=QuantitativeValue(-65, "mV")) instance = Instance(PatchedCell.path, cell1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_93f9cd9a9b" instance.data["@type"] = PatchedCell.type cell2 = PatchedCell.from_kg_instance(instance, kg_client) for field in ("name", "brain_location", "cell_type", "pipette_id", "seal_resistance", "pipette_resistance", "liquid_junction_potential", "labeling_compound", "reversal_potential_cl"): assert getattr(cell1, field) == getattr(cell2, field)
def handle_known_schema_identifier(self, schema_identifier, instance, hashcode_field, current_hashcode, identifier): if isinstance(identifier, list): identifier = identifier[0] found_instances = self._client.instances.find_by_field(instance.id, schema_identifier, identifier, resolved=True) if found_instances and found_instances.results: found_instance = found_instances.results[0] existing_hashcode = found_instance.data[hashcode_field] if hashcode_field in found_instance.data else None instance.path = found_instance.get_self_link() instance.id = Instance.extract_id_from_url(instance.path, instance.root_path) if existing_hashcode is None or existing_hashcode != current_hashcode: result = self._client.instances.update(instance) else: LOGGER.info("Skipping instance %s because it already exists", instance.path) result = instance return result return None
def create_instance(self, schema_name: str, schema_version: str, data: JSON) -> Optional[Instance]: if not self.is_schema_created(schema_name, schema_version): print("<error> Schema does not exist!") return None # TODO Check if the instance has already been created when Nexus v1. local = Instance.create_new(self.organization, self.domain, schema_name, schema_version, data) # Note: Even if NexusClient.logger.hasHandlers() is False and # NexusClient.logger.propagate is set to False, the logging message is # displayed in the IPython notebook. A new logger is then needed. # Using NexusClient.instances.create() outside the current method will # display the logging message as without the following modification. stream = StringIO() stream_handler = logging.StreamHandler(stream=stream) logger = logging.getLogger() logger.addHandler(stream_handler) try: return self.client.instances.create(local) except HTTPError: # Note: repr(HTTPError) is not informative on what really happened. # Note: The 'violations' key is two times in the string. message = stream.getvalue() match = re.search("""{"violations":[^}]+}$""", message) print("<error>") if match: errors = match.group(0) prettify(json.loads(errors)) else: print("--- was not able to parse the error message ---") print(message) print("<data>") prettify(data) return None finally: # Note: StringIO.close() flushes and closes the stream. stream.close() logger.removeHandler(stream_handler)
def create_instance(self, schema_name: str, schema_version: str, data: JSON) -> Optional[Instance]: if not self.is_schema_created(schema_name, schema_version): print("<error> Schema does not exist!") return None # TODO Check if the instance has already been created when Nexus v1. local = Instance.create_new(self.organization, self.domain, schema_name, schema_version, data) # Note: Even if NexusClient.logger.hasHandlers() is False and # NexusClient.logger.propagate is set to False, the logging message is # displayed in the IPython notebook. A new logger is then needed. # Using NexusClient.instances.create() outside the current method will # display the logging message as without the following modification. stream = StringIO() stream_handler = logging.StreamHandler(stream=stream) logger = logging.getLogger() logger.addHandler(stream_handler) try: return self.client.instances.create(local) except HTTPError: # Note: repr(HTTPError) is not informative on what really happened. # Note: The 'violations' key is two times in the string. message = stream.getvalue() match = re.search("""{"violations":[^}]+}$""", message) print("<error>") if match: errors = match.group(0) prettify(json.loads(errors)) else: print("--- was not able to parse the error message ---") print(message) print("<data>") prettify(data) return None finally: # Note: StringIO.close() flushes and closes the stream. stream.close() logger.removeHandler(stream_handler)
def build_kg_object(cls, data, resolved=False, client=None): """ Build a KGObject, a KGProxy, or a list of such, based on the data provided. This takes care of the JSON-LD quirk that you get a list if there are multiple objects, but you get the object directly if there is only one. Returns `None` if data is None. """ if data is None: return None if not isinstance(data, list): if not isinstance(data, dict): raise ValueError("data must be a list or dict") if "@list" in data: assert len(data) == 1 data = data["@list"] else: data = [data] objects = [] for item in data: if cls is None: # note that if cls is None, then the class can be different for each list item # therefore we need to use a new variable kg_cls inside the loop if "@type" in item: try: kg_cls = lookup_type(item["@type"]) except KeyError: kg_cls = lookup_type(compact_uri(item["@type"], standard_context)) elif "label" in item: kg_cls = lookup_by_iri(item["@id"]) # todo: add lookup by @id else: raise ValueError("Cannot determine type. Item was: {}".format(item)) else: kg_cls = cls if issubclass(kg_cls, StructuredMetadata): obj = kg_cls.from_jsonld(item) elif issubclass(kg_cls, KGObject): if "@id" in item and item["@id"].startswith("http"): # here is where we check the "resolved" keyword, # and return an actual object if we have the data # or resolve the proxy if we don't if resolved: if kg_cls.namespace is None: kg_cls.namespace = namespace_from_id(item["@id"]) try: instance = Instance(kg_cls.path, item, Instance.path) obj = kg_cls.from_kg_instance(instance, client, resolved=resolved) except (ValueError, KeyError) as err: # to add: emit a warning logger.warning("Error in building {}: {}".format(kg_cls.__name__, err)) obj = KGProxy(kg_cls, item["@id"]).resolve( client, api=item.get("fg:api", "query")) else: obj = KGProxy(kg_cls, item["@id"]) else: # todo: add a logger.warning that we have dud data obj = None else: raise ValueError("cls must be a subclass of KGObject or StructuredMetadata") if obj is not None: objects.append(obj) if len(objects) == 1: return objects[0] else: return objects
def read(self, organization, domain, schema, version, uuid, revision=None): identifier = Instance.create_id(organization, domain, schema, version) + "/" + uuid data = self._read(identifier, revision) return Instance(identifier, data, self.path) if data is not None else None
def unrelease(self, uri): """Unrelease the node with the given uri""" path = Instance.extract_id_from_url(uri, self._instance_repo.path) response = self._release_client.delete(path) if response.status_code not in (200, 204): raise Exception("Can't unrelease node with id {}".format(uri))
def is_released(self, uri): """Release status of the node""" path = Instance.extract_id_from_url(uri, self._instance_repo.path) response = self._release_client.get(path) return response.json()["status"] == "RELEASED"
def resolve(self, search_result): identifier = Entity.extract_id_from_url(search_result.self_link, self.path) data = self._read(identifier) return Instance(identifier, data, self.path) if data is not None else None
def create_new_instance(self, path, data): instance = Instance(path, data, Instance.path) entity = self._nexus_client.instances.create(instance) entity.data.update(data) return entity
test_schema_obj = Schema.create_new(organisation_name, domain_name, schema_name, version, test_schema) schema_repo = SchemaRepository(nar_client) schema_repo.create(test_schema_obj) schema_repo.publish(test_schema_obj, True) # # Read a schema # testschema_read = schema_repo.read(organisation_name, domain_name, schema_name, version) print(testschema_read) # # 5. Create a test instance. (test0) # TODO test_instance = {} # from pyxus.resources.repository import InstanceRepository from pyxus.resources.entity import Instance # Create an instance of naro/tests/testschema (v0.0.2) instance_repo = InstanceRepository(nar_client) test0 = Instance.create_new(organisation_name, domain_name, schema_name, version, test_instance) instance_repo.create(test0) # get uuid single_result = instance_repo.list(subpath=fpath, size=1, deprecated=None) instance = single_result.results[0] InstanceRepository._extract_uuid(instance.result_id)
def read_by_full_id(self, full_id, revision=None): data = self._read(full_id, revision) return Instance(full_id, data, self.path) if data is not None else None
example_person_data = { "@context": { "Person": "http://schema.org/Person", "givenName": "http://schema.org/givenName", "familyName": "http://schema.org/familyName" }, "@type": [ "Person" ], "familyName": "Nexus", "givenName": "Brian" } instance = Instance.create_new(organization=organization_name, domain=domain_name, schema=schema_name, version=schema_version, content=example_person_data) client.instances.create(instance) example_person_data_id = instance.data["@id"] example_person_data_rev = instance.get_revision() filepath = "https://docs.google.com/uc?id=1V8-hGYNMVqlCIrvlTKTxseMfZSGEqbHl" # Provide the address of the file you want to attach here r = requests.get(filepath) file = r.content url = "{}/attachment?rev={}".format(example_person_data_id, example_person_data_rev) file_attachment = {'file': file} response = requests.put(url, files=file_attachment)