def test_flatten__entities_has_input(self): # given with open(self.script_dir + '/entities-with-inputs-specimen.json') as file: metadata_entity = Entity.from_json(json.load(file)) with open(self.script_dir + '/entities-with-inputs-donor.json') as file: input_entity = Entity.from_json(json.load(file)) with open(self.script_dir + '/entities-with-inputs-process.json') as file: process = Entity.from_json(json.load(file)) with open(self.script_dir + '/entities-with-inputs-protocols.json') as file: protocols = Entity.from_json_list(json.load(file)) metadata_entity.set_input([input_entity], [], process, protocols) # when flattener = Flattener() actual = flattener.flatten([metadata_entity, process] + protocols) with open(self.script_dir + '/entities-with-inputs-flattened.json') as file: expected = json.load(file) self.assertEqual(actual, expected)
def test_flatten__has_ontology_property_with_single_element(self): # given self.content.update({ "organ_parts": [{ "ontology": "UBERON:0000376", "ontology_label": "hindlimb stylopod", "text": "hindlimb stylopod" }] }) metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [metadata_entity] # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) self.flattened_metadata_entity['Project']['values'][0].update({ 'project.organ_parts.ontology': 'UBERON:0000376', 'project.organ_parts.ontology_label': 'hindlimb stylopod', 'project.organ_parts.text': 'hindlimb stylopod', }) self.flattened_metadata_entity['Project']['headers'].extend([ 'project.organ_parts.ontology', 'project.organ_parts.ontology_label', 'project.organ_parts.text' ]) # then self.assertEqual(actual, self.flattened_metadata_entity)
def test_flatten__has_string_arrays(self): # given self.content.update({ "insdc_project_accessions": ["SRP180337"], "geo_series_accessions": ["GSE124298", "GSE124299"] }) self.metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [Entity.from_json(self.metadata_entity)] # when flattener = Flattener() actual = flattener.flatten(entity_list) self.flattened_metadata_entity['Project']['values'][0].update({ 'project.insdc_project_accessions': 'SRP180337', 'project.geo_series_accessions': "GSE124298||GSE124299" }) self.flattened_metadata_entity['Project']['headers'].extend([ 'project.insdc_project_accessions', 'project.geo_series_accessions' ]) self.assertEqual(actual, self.flattened_metadata_entity)
def test_flatten__raises_error__given_multiple_schema_versions_of_same_concrete_entity( self): # given entity_json_list = [ { 'content': { "describedBy": "https://schema.humancellatlas.org/type/project/14.2.0/donor_organism", "schema_type": "biomaterial", "field": "value" }, 'uuid': { 'uuid': 'uuid1' } }, { 'content': { "describedBy": "https://schema.humancellatlas.org/type/project/14.3.0/donor_organism", "schema_type": "biomaterial", "field": "value" }, 'uuid': { 'uuid': 'uuid2' } }, ] entity_list = Entity.from_json_list(entity_json_list) # when/then flattener = Flattener() with self.assertRaisesRegex( ValueError, "Multiple versions of same concrete entity schema"): flattener.flatten(entity_list)
def test_flatten__has_project_modules(self): # given self.content.update({ "contributors": [{ "name": "Alex A,,Pollen", "email": "*****@*****.**", "institution": "University of California, San Francisco (UCSF)", "laboratory": "Department of Neurology", "country": "USA", "corresponding_contributor": True, "project_role": { "text": "experimental scientist", "ontology": "EFO:0009741", "ontology_label": "experimental scientist" } }] }) metadata_entity = { 'content': self.content, 'uuid': self.uuid } entity_list = [metadata_entity] # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) self.flattened_metadata_entity.update({ 'Project - Contributors': { 'headers': [ 'project.contributors.name', 'project.contributors.email', 'project.contributors.institution', 'project.contributors.laboratory', 'project.contributors.country', 'project.contributors.corresponding_contributor', 'project.contributors.project_role.text', 'project.contributors.project_role.ontology', 'project.contributors.project_role.ontology_label' ], 'values': [{ 'project.contributors.corresponding_contributor': 'True', 'project.contributors.country': 'USA', 'project.contributors.email': '*****@*****.**', 'project.contributors.institution': 'University of California, San Francisco (UCSF)', 'project.contributors.laboratory': 'Department of Neurology', 'project.contributors.name': 'Alex A,,Pollen', 'project.contributors.project_role.ontology': 'EFO:0009741', 'project.contributors.project_role.ontology_label': 'experimental scientist', 'project.contributors.project_role.text': 'experimental scientist'} ]} }) # then self.assertEqual(actual, self.flattened_metadata_entity)
def __build_entity_dict(self, submission): data_by_submission = self.__get_submission_data(submission) entity_dict = {} for entity_json in data_by_submission: entity = Entity.from_json(entity_json) entity_dict[entity.id] = entity linking_map = self.__get_linking_map(submission) self.__set_inputs(entity_dict, linking_map) return entity_dict
def test_flatten__has_no_modules(self): # given metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [Entity.from_json(metadata_entity)] # when flattener = Flattener() actual = flattener.flatten(entity_list) self.assertEqual(actual, self.flattened_metadata_entity)
def test_flatten__project_metadata(self): # given with open(self.script_dir + '/project-list.json') as file: entity_list = json.load(file) # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) with open(self.script_dir + '/project-list-flattened.json') as file: expected = json.load(file) # then self.assertEqual(actual, expected)
def test_flatten__has_different_entities(self): # given with open(self.script_dir + '/entities.json') as file: entity_list = json.load(file) # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) with open(self.script_dir + '/entities-flattened.json') as file: expected = json.load(file) # then self.assertEqual(actual, expected)
def test_flatten__has_list_property_with_elements(self): # given content = { "describedBy": "https://schema.humancellatlas.org/type/project/14.2.0/collection_protocol", "schema_type": "protocol", "organ_parts": [{ "field_1": "UBERON:0000376", "field_2": "hindlimb stylopod", "field_3": "hindlimb stylopod" }] } metadata_entity = {'content': content, 'uuid': self.uuid} entity_list = [Entity.from_json(metadata_entity)] # when flattener = Flattener() actual = flattener.flatten(entity_list) flattened_metadata_entity = { 'Collection protocol': { 'values': [{ 'collection_protocol.organ_parts.field_1': 'UBERON:0000376', 'collection_protocol.organ_parts.field_2': 'hindlimb stylopod', 'collection_protocol.organ_parts.field_3': 'hindlimb stylopod', 'collection_protocol.uuid': 'uuid1' }], 'headers': [ 'collection_protocol.uuid', 'collection_protocol.organ_parts.field_1', 'collection_protocol.organ_parts.field_2', 'collection_protocol.organ_parts.field_3' ] }, 'Schemas': [ 'https://schema.humancellatlas.org/type/project/14.2.0/collection_protocol' ] } # then self.assertEqual(actual, flattened_metadata_entity)
def test_flatten__has_boolean(self): # given self.content.update({'boolean_field': True}) self.metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [Entity.from_json(self.metadata_entity)] # when flattener = Flattener() actual = flattener.flatten(entity_list) self.flattened_metadata_entity['Project']['values'][0].update( {'project.boolean_field': 'True'}) self.flattened_metadata_entity['Project']['headers'].append( 'project.boolean_field') # then self.assertEqual(actual, self.flattened_metadata_entity)
def test_flatten__has_ontology_property_with_single_element_but_only_with_text_attr( self): # given self.content.update({'diseases': [{'text': 'dummytext2'}]}) metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [metadata_entity] # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) self.flattened_metadata_entity['Project']['values'][0].update({ 'project.diseases.text': 'dummytext2', }) self.flattened_metadata_entity['Project']['headers'].extend( ['project.diseases.text']) # then self.assertEqual(actual, self.flattened_metadata_entity)
def test_flatten__has_ontology_property_with_multiple_elements_but_with_empty_ontology_values( self): # given self.content.update({ 'diseases': [{ 'ontology': 'UBERON:0000376', 'ontology_label': 'dummylabel1', 'text': 'dummytext1' }, { 'ontology': '', 'ontology_label': '', 'text': 'dummytext2' }] }) metadata_entity = {'content': self.content, 'uuid': self.uuid} entity_list = [metadata_entity] # when flattener = Flattener() actual = flattener.flatten(Entity.from_json_list(entity_list)) self.flattened_metadata_entity['Project']['values'][0].update({ 'project.diseases.ontology': 'UBERON:0000376', 'project.diseases.ontology_label': 'dummylabel1', 'project.diseases.text': 'dummytext1||dummytext2', }) self.flattened_metadata_entity['Project']['headers'].extend([ 'project.diseases.ontology', 'project.diseases.ontology_label', 'project.diseases.text' ]) # then self.assertEqual(self.flattened_metadata_entity, actual)
def test_flatten__rows_have_different_columns(self): # given entity_json_list = [ { 'content': { "describedBy": "https://schema.humancellatlas.org/type/project/14.2.0/project", "schema_type": "project", "project_core": { "project_short_name": "label1", } }, 'uuid': { 'uuid': 'uuid1' } }, { 'content': { "describedBy": "https://schema.humancellatlas.org/type/project/14.2.0/project", "schema_type": "project", "project_core": { "project_short_name": "label2", "project_title": "title", "project_description": "desc" } }, 'uuid': { 'uuid': 'uuid2' } }, ] # when flattener = Flattener() entity_list = [ Entity.from_json(entity_json) for entity_json in entity_json_list ] actual = flattener.flatten(entity_list) expected = { 'Project': { 'headers': [ 'project.uuid', 'project.project_core.project_short_name', 'project.project_core.project_title', 'project.project_core.project_description' ], 'values': [{ 'project.uuid': 'uuid1', 'project.project_core.project_short_name': 'label1' }, { 'project.uuid': 'uuid2', 'project.project_core.project_short_name': 'label2', 'project.project_core.project_title': 'title', 'project.project_core.project_description': 'desc' }] }, 'Schemas': ['https://schema.humancellatlas.org/type/project/14.2.0/project'] } # then self.assertEqual(actual, expected)