def archive(): data = request.get_json() submission_uuid = data.get('submission_uuid') exclude_types = data.get('exclude_types') alias_prefix = data.get('alias_prefix') if not submission_uuid: error = { 'message': f'You must supply the parameter submission_uuid referring to an Ingest submission envelope uuid.' } return response_json(HTTPStatus.BAD_REQUEST, error) if config.DIRECT_SUBMISSION: direct_archiver = direct_archiver_from_config() submission = direct_archiver.archive_submission(submission_uuid) response = submission.as_dict(string_lists=True) else: ingest_api = IngestAPI(config.INGEST_API_URL) archiver = IngestArchiver(ingest_api=ingest_api, dsp_api=DataSubmissionPortal( config.DSP_API_URL), exclude_types=exclude_types, alias_prefix=alias_prefix) thread = threading.Thread(target=async_archive, args=(ingest_api, archiver, submission_uuid)) thread.start() response = {'message': 'successfully triggered!'} return jsonify(response)
def test_notify_file_archiver(self, expand_curie): archive_submission = MagicMock(ArchiveSubmission) archive_submission.get_url = MagicMock(return_value='url') mock_manifest = self._mock_manifest(self.base_manifest) mock_manifest.get_library_preparation_protocol = MagicMock( return_value=self.base_manifest.get( 'library_preparation_protocol_10x')) seq_files = self.base_manifest.get('files') seq_file = copy.deepcopy(seq_files[0]) seq_file['content']['file_core']['file_name'] = "R2.fastq.gz" seq_files.append(seq_file) mock_manifest.get_files = MagicMock(return_value=seq_files) ingest_api = copy.deepcopy(self.ingest_api) ingest_api.get_manifest_by_id = MagicMock( return_value={'bundleUuid': 'dcp_uuid'}) archiver = IngestArchiver(ingest_api=ingest_api, dsp_api=self.dsp_api, ontology_api=self.ontology_api) archiver.get_manifest = MagicMock(return_value=mock_manifest) entity_map = archiver.convert(['bundle_uuid']) archive_submission.converted_entities = list( entity_map.get_converted_entities()) archive_submission.entity_map = entity_map messages = archiver.notify_file_archiver(archive_submission) expected = { "dsp_api_url": 'dsp_url', "dcp_bundle_uuid": 'dcp_uuid', 'submission_url': 'url', 'files': [{ 'name': 'dummy_manifest_id.bam' }], 'conversion': { 'output_name': 'dummy_manifest_id.bam', 'schema': '10xV2', 'inputs': [{ 'name': 'R1.fastq.gz', 'read_index': 'read1', 'cloud_url': 's3://org-humancellatlas-upload-dev/8cd91cfd-0374-454f-ac83-8db6581d2706/R1.fastq.gz' }, { 'name': 'R2.fastq.gz', 'read_index': 'read1', 'cloud_url': 's3://org-humancellatlas-upload-dev/8cd91cfd-0374-454f-ac83-8db6581d2706/R1.fastq.gz' }] }, 'manifest_id': 'dummy_manifest_id' } self.assertTrue(messages) self.assertEqual(expected, messages[0])
def __init__(self, alias_prefix, output_dir, exclude_types, no_validation): self.manifests = [] self.ingest_api = IngestAPI(config.INGEST_API_URL) self.dsp_api = DataSubmissionPortal(config.DSP_API_URL) now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S") self.output_dir = output_dir if output_dir else f"output/ARCHIVER_{now}" self.archiver = IngestArchiver(ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=self.split_exclude_types(exclude_types), alias_prefix=alias_prefix, dsp_validation=not no_validation)
def test_get_archivable_entities(self, expand_curie): mock_manifest = self._mock_manifest(self.base_manifest) archiver = IngestArchiver(ontology_api=self.ontology_api, ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=['sequencingRun']) archiver.get_manifest = MagicMock(return_value=mock_manifest) entity_map = archiver.convert(['manifest_id']) entities_by_type = entity_map.entities_dict_type self.assertTrue(entities_by_type.get('project')) self.assertTrue(entities_by_type.get('study')) self.assertTrue(entities_by_type.get('sample')) self.assertTrue(entities_by_type.get('sequencingExperiment'))
def test_archive(self): mock_manifest = self._mock_manifest(self.base_manifest) archiver = IngestArchiver(ontology_api=self.ontology_api, ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=['sequencingRun']) archiver.get_manifest = MagicMock(return_value=mock_manifest) entity_map = archiver.convert(['bundle_uuid']) archive_submission = archiver.archive(entity_map) self.assertTrue(archive_submission.is_completed) for entity in archive_submission.entity_map.get_entities(): self.assertTrue(archive_submission.accession_map.get(entity.id), f"{entity.id} has no accession.")
def test_validate_and_complete_submission(self): mock_manifest = self._mock_manifest(self.base_manifest) archiver = IngestArchiver(ontology_api=self.ontology_api, ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=['sequencingRun']) archiver.get_manifest = MagicMock(return_value=mock_manifest) entity_map = archiver.convert(['bundle_uuid']) archive_submission, _ = archiver.archive_metadata(entity_map) url = archive_submission.get_url() archive_submission = archiver.complete_submission( dsp_submission_url=url) self.assertTrue(archive_submission.is_completed) self.assertTrue(archive_submission.accession_map)
def test_archive_skip_metadata_with_accessions(self): with open(config.JSON_DIR + 'hca/biomaterial_with_accessions.json', encoding=config.ENCODING) as data_file: biomaterials = json.loads(data_file.read()) biomaterial_manifest = {'biomaterials': biomaterials} mock_manifest = self._mock_manifest(biomaterial_manifest) archiver = IngestArchiver(ontology_api=self.ontology_api, ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=['sequencingRun']) archiver.get_manifest = MagicMock(return_value=mock_manifest) entity_map = archiver.convert('') archive_submission = archiver.archive(entity_map) self.assertTrue(archive_submission.is_completed) self.assertTrue(archive_submission.errors) self.assertFalse(archive_submission.processing_result)
def async_complete(dsp_api, dsp_submission_uuid, ingest_api): logger.info('Starting...') start = time.time() ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid( dsp_submission_uuid) ingest_entities = ingest_api.get_related_entity(ingest_archive_submission, 'entities', 'archiveEntities') entity_map = ArchiveEntityMap.map_from_ingest_entities(ingest_entities) dsp_submission_url = dsp_api.get_submission_url(dsp_submission_uuid) archiver = IngestArchiver(ingest_api=ingest_api, dsp_api=dsp_api) archive_submission = archiver.complete_submission(dsp_submission_url, entity_map) end = time.time() logger.info( f'Completed DSP submission for {dsp_submission_uuid} in {end - start}s' ) return archive_submission