class IngestHydrator(Hydrator): """ DCP Ingest Service Submission hydrator class. Enables importing of HCA Ingest Service submissions by specifying a Submission ID. """ def __init__(self, graph, submission_uuid): super().__init__(graph) self._logger.info( f"Started ingest hydrator for for submission [{submission_uuid}]") self._ingest_api = IngestApi(Config['INGEST_API']) project_url = self._ingest_api.get_submission_by_uuid( submission_uuid)['_links']['relatedProjects']['href'] project = self._ingest_api.get( project_url).json()['_embedded']['projects'][0] self._logger.info( f"Found project for submission {project['uuid']['uuid']}") self._entities = {} for submission in self.fetch_submissions_in_project(project): self._logger.info( f"Found submission for project with uuid {submission['uuid']['uuid']}" ) for entity in self.build_entities_from_submission(submission): self._entities[entity['uuid']] = entity self._nodes = self.get_nodes() self._edges = self.get_edges() def fetch_submissions_in_project(self, project: dict) -> [dict]: self._logger.debug( f"Fetching submissions for project {project['uuid']['uuid']}") return self._ingest_api.get( project['_links']['submissionEnvelopes'] ['href']).json()['_embedded']['submissionEnvelopes'] def build_entities_from_submission(self, submission: dict): id_field_map = { 'biomaterials': "biomaterial_core.biomaterial_id", 'files': "file_core.file_name", 'processes': "process_core.process_id", 'projects': "project_core.project_short_name", 'protocols': "protocol_core.protocol_id", } for entity_type in [ "biomaterials", "files", "processes", "projects", "protocols" ]: for entity in self._ingest_api.get_entities( submission['_links']['self']['href'], entity_type): properties = flatten(entity['content']) new_entity = { 'properties': properties, 'labels': [entity['type'].lower()], 'node_id': properties[id_field_map[entity_type]], 'links': entity['_links'], 'uuid': entity['uuid']['uuid'], } concrete_type = new_entity['properties']['describedBy'].rsplit( '/', 1)[1] new_entity['labels'].append(concrete_type) yield new_entity @benchmark def get_nodes(self): self._logger.debug("importing nodes") nodes = {} for entity_uuid, entity in self._entities.items(): node_id = entity['node_id'] nodes[entity_uuid] = Node( *entity['labels'], **entity['properties'], uuid=entity['uuid'], self_link=entity['links']['self']['href'], id=node_id) self._logger.debug(f"({node_id})") self._logger.info(f"imported {len(nodes)} nodes") return nodes @benchmark def get_edges(self): self._logger.debug("importing edges") edges = [] relationship_map = { 'projects': "projects", 'protocols': "protocols", 'inputToProcesses': "processes", 'derivedByProcesses': "processes", 'inputBiomaterials': "biomaterials", 'derivedBiomaterials': "biomaterials", 'supplementaryFiles': "files", 'inputFiles': "files", 'derivedFiles': "files", } for entity_uuid, entity in self._entities.items(): for relationship_type in relationship_map.keys(): if relationship_type in entity['links']: relationships = self._ingest_api.get_all( entity['links'][relationship_type]['href'], relationship_map[relationship_type]) for end_entity in relationships: start_node = self._nodes[entity_uuid] relationship_name = convert_to_macrocase( relationship_type) try: end_node = self._nodes[end_entity['uuid']['uuid']] edges.append( Relationship(start_node, relationship_name, end_node)) # Adding additional relationships to the graphs. if relationship_name == 'INPUT_TO_PROCESSES': edges.append( Relationship(start_node, 'DUMMY_EXPERIMENTAL_DESIGN', end_node)) if relationship_name == 'DERIVED_BY_PROCESSES': edges.append( Relationship(end_node, 'DUMMY_EXPERIMENTAL_DESIGN', start_node)) self._logger.debug( f"({start_node['id']})-[:{relationship_name}]->({end_node['id']})" ) except KeyError: self._logger.debug( f"Missing end node at a [{start_node['id']}] entity." ) self._logger.info(f"imported {len(edges)} edges") return edges
class ValidationListener(ConsumerMixin): def __init__(self, connection, validation_queue, graph, test_path): self.connection = connection self.validation_queue = validation_queue self._graph = graph self._test_path = test_path if Config["INGEST_API"] == "http://localhost:8080" or not ( Config["GOOGLE_APPLICATION_CREDENTIALS"] and Config["INGEST_JWT_AUDIENCE"]): self._ingest_api = IngestApi(Config['INGEST_API']) else: s2s_token_client = S2STokenClient( credential=ServiceCredential.from_file( Config['GOOGLE_APPLICATION_CREDENTIALS']), audience=Config['INGEST_JWT_AUDIENCE']) token_manager = TokenManager(s2s_token_client) self._ingest_api = IngestApi(Config['INGEST_API'], token_manager=token_manager) self._logger = logging.getLogger(__name__) def get_consumers(self, consumer, channel): return [ consumer(queues=self.validation_queue, accept=["application/json;charset=UTF-8", "json"], on_message=self.handle_message, prefetch_count=10) ] def __patch_entity(self, message, entity_link): entity = self._ingest_api.get(entity_link).json() errors = entity["graphValidationErrors"] or [] errors.append(message) patch = {"graphValidationErrors": errors} self._ingest_api.patch(entity_link, patch) def __attempt_validation(self, submission, sub_uuid): try: submission_url = submission["_links"]["self"]["href"] if submission["submissionState"] == "Graph validating": raise RuntimeError( f"Cannot perform validation on submission {sub_uuid} as it is already validating." ) self._ingest_api.put(f'{submission_url}/graphValidatingEvent', data=None) validation_result = ValidationHandler(sub_uuid, self._graph, self._test_path).run() if validation_result is not None: self._logger.info(f"validation finished for {sub_uuid}") if not validation_result["valid"]: for failure in validation_result["failures"]: for entity in failure['affectedEntities']: self.__patch_entity(failure['message'], entity['link']) self._ingest_api.put(f'{submission_url}/graphInvalidEvent', data=None) else: self._ingest_api.put(f'{submission_url}/graphValidEvent', data=None) self._logger.info(f'Finished validating {sub_uuid}.') except Exception as e: self._logger.error(f"Failed validation with error {e}.") # TODO add endpoint to restore submission to metadata valid and log error self._graph.delete_all() def handle_message(self, message): try: payload = json.loads(message.payload) sub_uuid = payload['documentUuid'] if payload["documentType"] != "submissionenvelope": raise RuntimeError( f"Cannot process document since is not a submission envelope. UUID: f{sub_uuid}" ) self._logger.info(f"received validation request for {sub_uuid}") submission = self._ingest_api.get_submission_by_uuid(sub_uuid) self.__attempt_validation(submission, sub_uuid) except Exception as e: self._logger.error(f"Failed handling with error {e}.") message.ack()