Beispiel #1
0
    def test_notify_file_archiver(self, expand_curie):
        archive_submission = MagicMock(ArchiveSubmission)
        archive_submission.get_url = MagicMock(return_value='url')

        mock_manifest = self._mock_manifest(self.base_manifest)
        mock_manifest.get_library_preparation_protocol = MagicMock(
            return_value=self.base_manifest.get(
                'library_preparation_protocol_10x'))

        seq_files = self.base_manifest.get('files')
        seq_file = copy.deepcopy(seq_files[0])
        seq_file['content']['file_core']['file_name'] = "R2.fastq.gz"
        seq_files.append(seq_file)
        mock_manifest.get_files = MagicMock(return_value=seq_files)
        ingest_api = copy.deepcopy(self.ingest_api)
        ingest_api.get_manifest_by_id = MagicMock(
            return_value={'bundleUuid': 'dcp_uuid'})
        archiver = IngestArchiver(ingest_api=ingest_api,
                                  dsp_api=self.dsp_api,
                                  ontology_api=self.ontology_api)
        archiver.get_manifest = MagicMock(return_value=mock_manifest)
        entity_map = archiver.convert(['bundle_uuid'])
        archive_submission.converted_entities = list(
            entity_map.get_converted_entities())
        archive_submission.entity_map = entity_map

        messages = archiver.notify_file_archiver(archive_submission)

        expected = {
            "dsp_api_url": 'dsp_url',
            "dcp_bundle_uuid": 'dcp_uuid',
            'submission_url': 'url',
            'files': [{
                'name': 'dummy_manifest_id.bam'
            }],
            'conversion': {
                'output_name':
                'dummy_manifest_id.bam',
                'schema':
                '10xV2',
                'inputs': [{
                    'name':
                    'R1.fastq.gz',
                    'read_index':
                    'read1',
                    'cloud_url':
                    's3://org-humancellatlas-upload-dev/8cd91cfd-0374-454f-ac83-8db6581d2706/R1.fastq.gz'
                }, {
                    'name':
                    'R2.fastq.gz',
                    'read_index':
                    'read1',
                    'cloud_url':
                    's3://org-humancellatlas-upload-dev/8cd91cfd-0374-454f-ac83-8db6581d2706/R1.fastq.gz'
                }]
            },
            'manifest_id': 'dummy_manifest_id'
        }
        self.assertTrue(messages)
        self.assertEqual(expected, messages[0])
Beispiel #2
0
 def __init__(self, alias_prefix, output_dir, exclude_types, no_validation):
     self.manifests = []
     self.ingest_api = IngestAPI(config.INGEST_API_URL)
     self.dsp_api = DataSubmissionPortal(config.DSP_API_URL)
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S")
     self.output_dir = output_dir if output_dir else f"output/ARCHIVER_{now}"
     self.archiver = IngestArchiver(ingest_api=self.ingest_api,
                                    dsp_api=self.dsp_api,
                                    exclude_types=self.split_exclude_types(exclude_types),
                                    alias_prefix=alias_prefix,
                                    dsp_validation=not no_validation)
Beispiel #3
0
    def test_get_archivable_entities(self, expand_curie):
        mock_manifest = self._mock_manifest(self.base_manifest)

        archiver = IngestArchiver(ontology_api=self.ontology_api,
                                  ingest_api=self.ingest_api,
                                  dsp_api=self.dsp_api,
                                  exclude_types=['sequencingRun'])
        archiver.get_manifest = MagicMock(return_value=mock_manifest)
        entity_map = archiver.convert(['manifest_id'])
        entities_by_type = entity_map.entities_dict_type
        self.assertTrue(entities_by_type.get('project'))
        self.assertTrue(entities_by_type.get('study'))
        self.assertTrue(entities_by_type.get('sample'))
        self.assertTrue(entities_by_type.get('sequencingExperiment'))
Beispiel #4
0
    def test_archive(self):
        mock_manifest = self._mock_manifest(self.base_manifest)
        archiver = IngestArchiver(ontology_api=self.ontology_api,
                                  ingest_api=self.ingest_api,
                                  dsp_api=self.dsp_api,
                                  exclude_types=['sequencingRun'])
        archiver.get_manifest = MagicMock(return_value=mock_manifest)
        entity_map = archiver.convert(['bundle_uuid'])
        archive_submission = archiver.archive(entity_map)
        self.assertTrue(archive_submission.is_completed)

        for entity in archive_submission.entity_map.get_entities():
            self.assertTrue(archive_submission.accession_map.get(entity.id),
                            f"{entity.id} has no accession.")
Beispiel #5
0
def archive():
    data = request.get_json()

    submission_uuid = data.get('submission_uuid')
    exclude_types = data.get('exclude_types')
    alias_prefix = data.get('alias_prefix')

    if not submission_uuid:
        error = {
            'message':
            f'You must supply the parameter submission_uuid referring to an Ingest submission envelope uuid.'
        }
        return response_json(HTTPStatus.BAD_REQUEST, error)

    if config.DIRECT_SUBMISSION:
        direct_archiver = direct_archiver_from_config()
        submission = direct_archiver.archive_submission(submission_uuid)
        response = submission.as_dict(string_lists=True)
    else:
        ingest_api = IngestAPI(config.INGEST_API_URL)
        archiver = IngestArchiver(ingest_api=ingest_api,
                                  dsp_api=DataSubmissionPortal(
                                      config.DSP_API_URL),
                                  exclude_types=exclude_types,
                                  alias_prefix=alias_prefix)

        thread = threading.Thread(target=async_archive,
                                  args=(ingest_api, archiver, submission_uuid))
        thread.start()

        response = {'message': 'successfully triggered!'}

    return jsonify(response)
Beispiel #6
0
    def test_archive_skip_metadata_with_accessions(self):
        with open(config.JSON_DIR + 'hca/biomaterial_with_accessions.json',
                  encoding=config.ENCODING) as data_file:
            biomaterials = json.loads(data_file.read())
        biomaterial_manifest = {'biomaterials': biomaterials}
        mock_manifest = self._mock_manifest(biomaterial_manifest)
        archiver = IngestArchiver(ontology_api=self.ontology_api,
                                  ingest_api=self.ingest_api,
                                  dsp_api=self.dsp_api,
                                  exclude_types=['sequencingRun'])
        archiver.get_manifest = MagicMock(return_value=mock_manifest)
        entity_map = archiver.convert('')
        archive_submission = archiver.archive(entity_map)

        self.assertTrue(archive_submission.is_completed)
        self.assertTrue(archive_submission.errors)
        self.assertFalse(archive_submission.processing_result)
Beispiel #7
0
def async_complete(dsp_api, dsp_submission_uuid, ingest_api):
    logger.info('Starting...')
    start = time.time()
    ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid(
        dsp_submission_uuid)
    ingest_entities = ingest_api.get_related_entity(ingest_archive_submission,
                                                    'entities',
                                                    'archiveEntities')
    entity_map = ArchiveEntityMap.map_from_ingest_entities(ingest_entities)
    dsp_submission_url = dsp_api.get_submission_url(dsp_submission_uuid)
    archiver = IngestArchiver(ingest_api=ingest_api, dsp_api=dsp_api)
    archive_submission = archiver.complete_submission(dsp_submission_url,
                                                      entity_map)
    end = time.time()
    logger.info(
        f'Completed DSP submission for {dsp_submission_uuid} in {end - start}s'
    )
    return archive_submission
Beispiel #8
0
def async_archive(ingest_api: IngestAPI, archiver: IngestArchiver,
                  submission_uuid: str):
    logger.info('Starting...')
    start = time.time()
    manifests = ingest_api.get_manifest_ids_from_submission(submission_uuid)

    try:
        entity_map: ArchiveEntityMap = archiver.convert(manifests)
        dsp_submission, ingest_tracker = archiver.archive_metadata(entity_map)
        archiver.notify_file_archiver(dsp_submission)
        ingest_tracker.patch_archive_submission({
            'submissionUuid':
            submission_uuid,
            'fileUploadPlan':
            dsp_submission.file_upload_info
        })
        end = time.time()
        logger.info(
            f'Creating DSP submission for {submission_uuid} finished in {end - start}s'
        )
    except Exception as e:
        logger.exception(e)
        raise
Beispiel #9
0
    def test_validate_and_complete_submission(self):
        mock_manifest = self._mock_manifest(self.base_manifest)
        archiver = IngestArchiver(ontology_api=self.ontology_api,
                                  ingest_api=self.ingest_api,
                                  dsp_api=self.dsp_api,
                                  exclude_types=['sequencingRun'])
        archiver.get_manifest = MagicMock(return_value=mock_manifest)
        entity_map = archiver.convert(['bundle_uuid'])
        archive_submission, _ = archiver.archive_metadata(entity_map)
        url = archive_submission.get_url()

        archive_submission = archiver.complete_submission(
            dsp_submission_url=url)
        self.assertTrue(archive_submission.is_completed)
        self.assertTrue(archive_submission.accession_map)
Beispiel #10
0
class ArchiveCLI:
    def __init__(self, alias_prefix, output_dir, exclude_types, no_validation):
        self.manifests = []
        self.ingest_api = IngestAPI(config.INGEST_API_URL)
        self.dsp_api = DataSubmissionPortal(config.DSP_API_URL)
        now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S")
        self.output_dir = output_dir if output_dir else f"output/ARCHIVER_{now}"
        self.archiver = IngestArchiver(ingest_api=self.ingest_api,
                                       dsp_api=self.dsp_api,
                                       exclude_types=self.split_exclude_types(exclude_types),
                                       alias_prefix=alias_prefix,
                                       dsp_validation=not no_validation)

    def get_manifests_from_project(self, project_uuid):
        logging.info(f'GETTING MANIFESTS FOR PROJECT: {project_uuid}')
        self.manifests = self.ingest_api.get_manifest_ids_from_project(project_uuid=project_uuid)

    def get_manifests_from_submission(self, submission_uuid):
        logging.info(f'GETTING MANIFESTS FOR SUBMISSION: {submission_uuid}')
        self.manifests = self.ingest_api.get_manifest_ids_from_submission(submission_uuid)

    def get_manifests_from_list(self, manifest_list_file):
        logging.info(f'GETTING MANIFESTS FROM FILE: {manifest_list_file}')
        with open(manifest_list_file) as f:
            content = f.readlines()
        parsed_manifest_list = [x.strip() for x in content]
        self.manifests = parsed_manifest_list

    def complete_submission(self, dsp_submission_url):
        logging.info(f'##################### COMPLETING DSP SUBMISSION {dsp_submission_url}')
        archive_submission = ArchiveSubmission(dsp_api=self.archiver.dsp_api, dsp_submission_url=dsp_submission_url)
        ingest_archive_submission = self.ingest_api.get_archive_submission_by_dsp_uuid(archive_submission.dsp_uuid)
        ingest_entities = self.ingest_api.get_related_entity(ingest_archive_submission, 'entities', 'archiveEntities')
        entity_map = ArchiveEntityMap.map_from_ingest_entities(ingest_entities)
        archive_submission = self.archiver.complete_submission(dsp_submission_url, entity_map)
        report = archive_submission.generate_report()
        self.save_dict_to_file(f'COMPLETE_SUBMISSION_{archive_submission.dsp_uuid}', report)

    def build_map(self):
        logging.info(f'Processing {len(self.manifests)} manifests:\n' + "\n".join(map(str, self.manifests)))

        entity_map: ArchiveEntityMap = self.archiver.convert(self.manifests)
        summary = entity_map.get_conversion_summary()
        logging.info(f'Entities to be converted: {json.dumps(summary)}')

        report = entity_map.generate_report()
        logging.info("Saving Report file...")
        self.save_dict_to_file("REPORT", report)
        return entity_map

    def load_map(self, load_path):
        logging.info(f'Loading Entity Map: {load_path}')
        file_content: dict = self.load_dict_from_file(load_path)
        if file_content.get('entities'):
            return ArchiveEntityMap.map_from_report(file_content['entities'])
        logging.error(f"--load_path files does not have an entities object: {file_content}")
        exit(2)

    def validate_submission(self, entity_map: ArchiveEntityMap, submit, ingest_submission_uuid=None):
        archive_submission, ingest_archive_submission = self.archiver.archive_metadata(entity_map)
        all_messages = self.archiver.notify_file_archiver(archive_submission)
        ingest_archive_submission.patch_archive_submission({
            'submissionUuid': ingest_submission_uuid,
            'fileUploadPlan': archive_submission.file_upload_info
        })
        report = archive_submission.generate_report()
        logging.info("Updating Report file...")
        self.save_dict_to_file("REPORT", report)

        logging.info("##################### FILE ARCHIVER NOTIFICATION")
        self.save_dict_to_file("FILE_UPLOAD_INFO", {"jobs": all_messages})
        if submit:
            archive_submission.validate_and_submit()
        else:
            archive_submission.validate()

    def generate_validation_error_report(self, dsp_submission_url):
        submission = ArchiveSubmission(dsp_api=self.archiver.dsp_api, dsp_submission_url=dsp_submission_url)
        self.save_dict_to_file("VALIDATION_ERROR_REPORT", submission.get_validation_error_report())

    def save_dict_to_file(self, file_name, json_content):
        if not self.output_dir:
            return

        directory = os.path.abspath(self.output_dir)

        if not os.path.exists(directory):
            os.makedirs(directory)

        file = directory + "/" + file_name + ".json"
        if os.path.exists(file):
            os.remove(file)

        with open(file, "w") as open_file:
            json.dump(json_content, open_file, indent=4)
            open_file.close()

        logging.info(f"Saved to {directory}/{file_name}.json!")

    @staticmethod
    def load_dict_from_file(file_path):
        path = os.path.abspath(file_path)
        if os.path.exists(path) and os.path.isfile(path):
            with open(path, 'r') as open_file:
                content = open_file.read()
            return json.loads(content)
        else:
            logging.error(f"--load_path does not exist or is not a file: {file_path}")
            exit(2)

    @staticmethod
    def split_exclude_types(exclude_types):
        if exclude_types:
            exclude_types = [x.strip() for x in exclude_types.split(',')]
            logging.warning(f"Excluding {', '.join(exclude_types)}")
        return exclude_types