def test_get_all(self, mock_create_session): # given ingest_api = IngestApi(token_manager=self.token_manager) mocked_responses = { 'url?page=0&size=3': { "page": { "size": 3, "totalElements": 5, "totalPages": 2, "number": 0 }, "_embedded": { "bundleManifests": [{ "attr": "value" }, { "attr": "value" }, { "attr": "value" }] }, "_links": { "next": { 'href': 'url?page=1&size=3' } } }, 'url?page=1&size=3': { "page": { "size": 3, "totalElements": 5, "totalPages": 2, "number": 1 }, "_embedded": { "bundleManifests": [{ "attr": "value" }, { "attr": "value" }] }, "_links": {} } } mock_create_session.return_value.get = lambda url, headers: self._create_mock_response( url, mocked_responses) # when entities = ingest_api.get_all('url?page=0&size=3', "bundleManifests") self.assertEqual(len(list(entities)), 5)
class IngestHydrator(Hydrator): """ DCP Ingest Service Submission hydrator class. Enables importing of HCA Ingest Service submissions by specifying a Submission ID. """ def __init__(self, graph, submission_uuid): super().__init__(graph) self._logger.info( f"Started ingest hydrator for for submission [{submission_uuid}]") self._ingest_api = IngestApi(Config['INGEST_API']) project_url = self._ingest_api.get_submission_by_uuid( submission_uuid)['_links']['relatedProjects']['href'] project = self._ingest_api.get( project_url).json()['_embedded']['projects'][0] self._logger.info( f"Found project for submission {project['uuid']['uuid']}") self._entities = {} for submission in self.fetch_submissions_in_project(project): self._logger.info( f"Found submission for project with uuid {submission['uuid']['uuid']}" ) for entity in self.build_entities_from_submission(submission): self._entities[entity['uuid']] = entity self._nodes = self.get_nodes() self._edges = self.get_edges() def fetch_submissions_in_project(self, project: dict) -> [dict]: self._logger.debug( f"Fetching submissions for project {project['uuid']['uuid']}") return self._ingest_api.get( project['_links']['submissionEnvelopes'] ['href']).json()['_embedded']['submissionEnvelopes'] def build_entities_from_submission(self, submission: dict): id_field_map = { 'biomaterials': "biomaterial_core.biomaterial_id", 'files': "file_core.file_name", 'processes': "process_core.process_id", 'projects': "project_core.project_short_name", 'protocols': "protocol_core.protocol_id", } for entity_type in [ "biomaterials", "files", "processes", "projects", "protocols" ]: for entity in self._ingest_api.get_entities( submission['_links']['self']['href'], entity_type): properties = flatten(entity['content']) new_entity = { 'properties': properties, 'labels': [entity['type'].lower()], 'node_id': properties[id_field_map[entity_type]], 'links': entity['_links'], 'uuid': entity['uuid']['uuid'], } concrete_type = new_entity['properties']['describedBy'].rsplit( '/', 1)[1] new_entity['labels'].append(concrete_type) yield new_entity @benchmark def get_nodes(self): self._logger.debug("importing nodes") nodes = {} for entity_uuid, entity in self._entities.items(): node_id = entity['node_id'] nodes[entity_uuid] = Node( *entity['labels'], **entity['properties'], uuid=entity['uuid'], self_link=entity['links']['self']['href'], id=node_id) self._logger.debug(f"({node_id})") self._logger.info(f"imported {len(nodes)} nodes") return nodes @benchmark def get_edges(self): self._logger.debug("importing edges") edges = [] relationship_map = { 'projects': "projects", 'protocols': "protocols", 'inputToProcesses': "processes", 'derivedByProcesses': "processes", 'inputBiomaterials': "biomaterials", 'derivedBiomaterials': "biomaterials", 'supplementaryFiles': "files", 'inputFiles': "files", 'derivedFiles': "files", } for entity_uuid, entity in self._entities.items(): for relationship_type in relationship_map.keys(): if relationship_type in entity['links']: relationships = self._ingest_api.get_all( entity['links'][relationship_type]['href'], relationship_map[relationship_type]) for end_entity in relationships: start_node = self._nodes[entity_uuid] relationship_name = convert_to_macrocase( relationship_type) try: end_node = self._nodes[end_entity['uuid']['uuid']] edges.append( Relationship(start_node, relationship_name, end_node)) # Adding additional relationships to the graphs. if relationship_name == 'INPUT_TO_PROCESSES': edges.append( Relationship(start_node, 'DUMMY_EXPERIMENTAL_DESIGN', end_node)) if relationship_name == 'DERIVED_BY_PROCESSES': edges.append( Relationship(end_node, 'DUMMY_EXPERIMENTAL_DESIGN', start_node)) self._logger.debug( f"({start_node['id']})-[:{relationship_name}]->({end_node['id']})" ) except KeyError: self._logger.debug( f"Missing end node at a [{start_node['id']}] entity." ) self._logger.info(f"imported {len(edges)} edges") return edges