def test_to_jsonld(self, kg_client): obj = Distribution( location="http://example.com/data.dat", size=123, # bytes digest="abcdef", digest_method="sha1", content_type="application/foo", original_file_name="data.dat") data = obj.to_jsonld(kg_client) expected_data = { '@context': 'https://nexus.humanbrainproject.org/v0/contexts/nexus/core/distribution/v0.1.0', 'contentSize': { 'unit': 'byte', 'value': 123 }, 'digest': { 'algorithm': 'sha1', 'value': 'abcdef' }, 'downloadURL': 'http://example.com/data.dat', 'mediaType': 'application/foo', 'originalFileName': 'data.dat' } assert data == expected_data
def test_round_trip(self, kg_client): trace1 = Trace("example001", data_location=Distribution( "http://example.com/example.csv", content_type="text/tab-separated-values"), generated_by=MockKGObject( id="http://fake_uuid_abc123", type=PatchClampExperiment.type), generation_metadata=MockKGObject( id="http://fake_uuid_def456", type=QualifiedTraceGeneration.type), channel=42, data_unit="mV", time_step=QuantitativeValue(0.1, "ms"), part_of=MockKGObject(id="http://fake_uuid_ghi789", type=Dataset.type)) instance = Instance(Trace.path, trace1._build_data(kg_client), Instance.path) instance.data["@id"] = "http://fake_uuid_6a5d6ecf87" instance.data["@type"] = Trace.type trace2 = Trace.from_kg_instance(instance, kg_client) for field in ("name", "data_location", "channel", "data_unit", "time_step"): assert getattr(trace1, field) == getattr(trace2, field) for field in ("generated_by", "generation_metadata", "part_of"): obj1 = getattr(trace1, field) obj2 = getattr(trace2, field) assert isinstance(obj2, KGProxy) assert obj1.id == obj2.id assert obj1.type == obj2.type
def generate_random_object(cls, all_fields=True): attrs = {} for field in cls.fields: if all_fields or field.required: obj_type = field.types[ 0] # todo: pick randomly if len(field.types) > 1 if not field.intrinsic: value = None elif obj_type == basestring: value = _random_text() elif obj_type == int: value = random.randint(1, 10) elif obj_type == float: value = random.uniform(0, 1000) elif issubclass(obj_type, KGObject): if obj_type == KGObject: # specific type is not determined # arbitrarily, let's choose minds.Dataset value = MockKGObject(id=random_uuid(), type=["minds:Dataset"]) else: value = MockKGObject(id=random_uuid(), type=getattr(obj_type, "type", None)) elif obj_type == QuantitativeValue: # todo: subclass QV so we can specify the required dimensionality in `fields` value = QuantitativeValue( random.uniform(-10, 10), random.choice(list(QuantitativeValue.unit_codes))) elif obj_type == QuantitativeValueRange: # todo: subclass QVR so we can specify the required dimensionality in `fields` min = random.uniform(-10, 10) value = QuantitativeValueRange( min, min + random.uniform(1, 10), random.choice(list(QuantitativeValue.unit_codes))) elif issubclass(obj_type, OntologyTerm): value = obj_type(random.choice(list(obj_type.iri_map))) elif obj_type == datetime: value = datetime.now() elif obj_type == date: value = date.today() elif obj_type == bool: value = random.choice([True, False]) elif obj_type == Distribution: value = Distribution("http://example.com/myfile.txt") elif obj_type == Age: value = Age(QuantitativeValue(random.randint(7, 150), "days"), "Post-natal") elif obj_type == IRI: value = "http://example.com/åêïøù" elif obj_type == Address: value = Address("Paris", "France") elif obj_type == dict: value = {"a": 1, "b": 2} else: raise NotImplementedError(str(obj_type)) attrs[field.name] = value return cls(**attrs)
def save(self): if self.obj is None: # create logger.debug("Saving result with data {}".format(self.data)) timestamp = datetime.now() additional_data = [ AnalysisResult(name="{} @ {}".format(uri, timestamp.isoformat()), result_file=Distribution(uri), timestamp=timestamp) for uri in self.data["results_storage"] ] for ad in additional_data: ad.save(self.client) self.obj = ValidationResult( name= "Validation results for model {} and test {} with timestamp {}" .format(self.data["model_version_id"], self.data["test_code_id"], timestamp.isoformat()), generated_by=None, description=None, score=self.data["score"], normalized_score=self.data["normalized_score"], passed=self.data["passed"], timestamp=timestamp, additional_data=additional_data, collab_id=self.data["project"]) self.obj.save(self.client) test_definition = self.data["test_script"].test_definition.resolve( self.client, api="nexus") reference_data = Collection( "Reference data for {}".format(test_definition.name), members=[ item.resolve(self.client, api="nexus") for item in as_list(test_definition.reference_data) ]) reference_data.save(self.client) activity = ValidationActivity( model_instance=self.data["model_instance"], test_script=self.data["test_script"], reference_data=reference_data, timestamp=timestamp, result=self.obj) activity.save(self.client) self.obj.generated_by = activity self.obj.save(self.client) else: # update raise NotImplementedError() return self.obj
def test_from_jsonld_minimal(self, kg_client): data = { '@context': 'https://nexus.humanbrainproject.org/v0/contexts/nexus/core/distribution/v0.1.0', 'downloadURL': 'http://example.com/data2.dat', } obj = Distribution.from_jsonld(data) assert obj.size is None assert obj.content_type is None assert obj.digest is None assert obj.digest_method is None assert obj.location == 'http://example.com/data2.dat'
def migrate_validation_definitions(self): tests = ValidationTestDefinition.objects.all() for test in tests: authors = self._get_people_from_Persons_table(test.author) for author in authors: author.save(NAR_client) brain_region = self.get_parameters("brain_region", test.brain_region) species = self.get_parameters("species", test.species) cell_type = self.get_parameters("cell_type", test.cell_type) #age = self.get_parameters("age", test.age) if test.data_location == "to do": test.data_location = "http://example.com/todo" reference_data = AnalysisResult( name="Reference data for validation test '{}'".format( test.name), result_file=Distribution(test.data_location)) reference_data.save(NAR_client) test_definition = ValidationTestDefinitionKG( name=test.name, authors=authors, description=test.protocol, date_created=test.creation_date, alias=test.alias, brain_region=brain_region, species=species, celltype=cell_type, test_type=test.test_type, age=None, #age, reference_data=reference_data, data_type=test.data_type, recording_modality=test.data_modality, #test.publication, status="in development", #test.status, old_uuid=str(test.id)) try: test_definition.save(NAR_client) except Exception as err: if "internal server error" in err.response.text: logger.error(err) else: raise else: logger.info("ValidationTestDefinition saved: %s", test_definition) print(test_definition) return ''
def test_from_jsonld(self, kg_client): data = { '@context': 'https://nexus.humanbrainproject.org/v0/contexts/nexus/core/distribution/v0.1.0', 'contentSize': { 'unit': 'byte', 'value': 456 }, 'digest': { 'algorithm': 'sha1', 'value': 'a1b2c3' }, 'downloadURL': 'http://example.com/data2.dat', 'mediaType': 'application/bar', 'originalFileName': 'data2.dat' } obj = Distribution.from_jsonld(data) assert obj.size == 456 assert obj.content_type == "application/bar" assert obj.digest == 'a1b2c3' assert obj.digest_method == "sha1" assert obj.location == 'http://example.com/data2.dat'
def migrate_validation_results(self): result_objects = ValidationTestResult.objects.all() storage_token = os.environ["HBP_STORAGE_TOKEN"] storage_client = StorageClient.new(storage_token) for ro in result_objects[800:]: model_instance = lookup_model_instance( str(ro.model_version.id), NAR_client) # use oldUUID (stored in nsg:providerId) test_script = lookup_test_script(str(ro.test_code.id), NAR_client) if not model_instance: logger.error("Model instance for {} not found in KG".format( ro.model_version)) continue if not test_script: logger.error("Test script for {} not found in KG".format( ro.test_code)) continue test_definition = test_script.test_definition.resolve(NAR_client) assert test_definition additional_data = [ AnalysisResult(name="{} @ {}".format(uri, ro.timestamp.isoformat()), result_file=Distribution(uri), timestamp=ro.timestamp) for uri in get_file_list(ro.results_storage, storage_client) ] for ad in additional_data: ad.save(NAR_client) result_kg = ValidationResult( name="Result of running '{}' on model '{}' at {}".format( test_script.name, model_instance.name, ro.timestamp), generated_by=None, description=ro. platform, # temporary location pending integration in KG score=ro.score, normalized_score=ro.normalized_score, passed=ro.passed, timestamp=ro.timestamp, additional_data=additional_data, old_uuid=str(ro.id), collab_id=ro.project) result_kg.save(NAR_client) logger.info("ValidationResult saved: %s", result_kg) reference_data = Collection( "Reference data for {}".format(test_definition.name), members=test_definition.reference_data.resolve(NAR_client)) reference_data.save(NAR_client) validation_activity = ValidationActivity( model_instance=model_instance, test_script=test_script, reference_data=reference_data, timestamp=ro.timestamp, result=result_kg, started_by=None) validation_activity.save(NAR_client) logger.info("ValidationActivity saved: %s", validation_activity) result_kg.generated_by = validation_activity result_kg.save(NAR_client)
def save(self): if self.obj is None: # create reference_data = [ AnalysisResult( name="Reference data #{} for validation test '{}'".format( i, self.data["name"]), result_file=Distribution(url)) for i, url in enumerate(as_list(self.data["data_location"])) ] for item in reference_data: try: item.save(self.client) except Exception as err: logger.error( "error saving reference data. name = {}, urls={}". format(self.data["name"], self.data["data_location"])) raise authors = self.data["author"] # if not isinstance(authors, list): # authors = [authors] self.obj = ValidationTestDefinition( name=self.data["name"], alias=self.data.get("alias"), status=self.data.get("status", "proposal"), species=self._get_ontology_obj(Species, "species"), brain_region=self._get_ontology_obj(BrainRegion, "brain_region"), celltype=self._get_ontology_obj(CellType, "cell_type"), reference_data=reference_data, data_type=self.data.get("data_type"), recording_modality=self.data.get("data_modality"), test_type=self.data.get("test_type"), score_type=self.data.get("score_type"), description=self.data.get("protocol"), authors=[ Person(p["family_name"], p["given_name"], p.get("email", None)) for p in as_list(authors) ], date_created=datetime.now()) for author in self.obj.authors: author.save(self.client) else: # update logger.debug("Updating test {} with data {}".format( self.obj.id, self.data)) if "name" in self.data: self.obj.name = self.data["name"] if "alias" in self.data: self.obj.alias = self.data["alias"] if "status" in self.data: self.obj.status = self.data["status"] if "species" in self.data: self.obj.species = self._get_ontology_obj(Species, "species") if "brain_region" in self.data: self.obj.brain_region = self._get_ontology_obj( BrainRegion, "brain_region") if "cell_type" in self.data: self.obj.celltype = self._get_ontology_obj( CellType, "cell_type") if "data_type" in self.data: self.obj.data_type = self.data["data_type"] if "data_modality" in self.data: self.obj.recording_modality = self.data["data_modality"] if "test_type" in self.data: self.obj.test_type = self.data["test_type"] if "score_type" in self.data: self.obj.score_type = self.data["score_type"] if "protocol" in self.data: self.obj.description = self.data["protocol"] if "data_location" in self.data: self.obj.reference_data = [ AnalysisResult( name="Reference data #{} for validation test '{}'". format(i, self.data["name"]), result_file=Distribution(url)) for i, url in enumerate( as_list(self.data["data_location"])) ] if "author" in self.data: self.obj.authors = [ Person(p["family_name"], p["given_name"], p.get("email", None)) for p in as_list(self.data["author"]) ] # now save people, ref data, test. No easy way to make this atomic, I don't think. for person in as_list(self.obj.authors): if not isinstance(person, KGProxy): # no need to save if we have a proxy object, as # that means the person hasn't been updated # although in fact the authors are saved when the test is saved # need to make this consistent person.save(self.client) for ref_data in as_list(self.obj.reference_data): if not isinstance(person, KGProxy): ref_data.save(self.client) self.obj.save(self.client) return self.obj
os.system( 'wget https://object.cscs.ch/v1/AUTH_c0a333ecf7c045809321ce9d9ecdfdea/simulation_result_demo/model/model_script.py' ) from fairgraph.base import Distribution ############################################### ### Documenting Model Metadata ################ ############################################### ## --> starting with script metadata underlying the model model_script = brainsimulation.ModelScript( name='Script for Toy model#%s of network dynamics for demo purpose' % str(datetime.now), code_format='python', distribution=Distribution(container_url + '/model/model_script.py'), license='CC BY-SA') model_script.save(client) # SAVE IN KG print('The KG ID is:', model_script.id) ## --> building a model instance (version) from those metadata my_model = brainsimulation.ModelInstance( name='Toy model#%s of neural network dynamics for demo purpose' % str(datetime.now), main_script=model_script, description=""" This model#%s implements a very simple description of desynchronized activity in neural assemblies: - Single neuron spiking consists of independent Poisson processes - Vm fluctuations are sampled from a random process with Gaussian distribution """ % str(datetime.now), version='v0')