def get_latest_archive_submission(ingest_submission_uuid): ingest_api = IngestAPI(config.INGEST_API_URL) latest_archive_submission = ingest_api.get_latest_archive_submission_by_submission_uuid( ingest_submission_uuid) if not latest_archive_submission: return response_json(HTTPStatus.NOT_FOUND, None) del latest_archive_submission['_links'] return jsonify(latest_archive_submission)
def __init__(self, alias_prefix, output_dir, exclude_types, no_validation): self.manifests = [] self.ingest_api = IngestAPI(config.INGEST_API_URL) self.dsp_api = DataSubmissionPortal(config.DSP_API_URL) now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S") self.output_dir = output_dir if output_dir else f"output/ARCHIVER_{now}" self.archiver = IngestArchiver(ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=self.split_exclude_types(exclude_types), alias_prefix=alias_prefix, dsp_validation=not no_validation)
def sendFile(dsp_submission_uuid: str): ingest_api = IngestAPI(config.INGEST_API_URL) ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid( dsp_submission_uuid) jobs = ingest_archive_submission.get('fileUploadPlan') content = json.dumps({'jobs': jobs}, indent=4) filename = f'FILE_UPLOAD_PLAN_{dsp_submission_uuid}.json' return Response( content, mimetype='application/json', headers={'Content-Disposition': f'attachment;filename={filename}'})
def delete_archive_submission(dsp_submission_uuid: str): ingest_api = IngestAPI(config.INGEST_API_URL) dsp_api = DataSubmissionPortal(config.DSP_API_URL) ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid( dsp_submission_uuid) dsp_url = ingest_archive_submission['dspUrl'] dsp_api.delete_submission(dsp_url) logger.info(f'Deleting DSP submission {dsp_url}') archive_submission_url = ingest_archive_submission['_links']['self'][ 'href'] logger.info(f'Deleting Ingest Archive Submission {archive_submission_url}') response = ingest_api.delete(archive_submission_url) return response_json(HTTPStatus.OK, data=response)
def archive(): data = request.get_json() submission_uuid = data.get('submission_uuid') exclude_types = data.get('exclude_types') alias_prefix = data.get('alias_prefix') if not submission_uuid: error = { 'message': f'You must supply the parameter submission_uuid referring to an Ingest submission envelope uuid.' } return response_json(HTTPStatus.BAD_REQUEST, error) if config.DIRECT_SUBMISSION: direct_archiver = direct_archiver_from_config() submission = direct_archiver.archive_submission(submission_uuid) response = submission.as_dict(string_lists=True) else: ingest_api = IngestAPI(config.INGEST_API_URL) archiver = IngestArchiver(ingest_api=ingest_api, dsp_api=DataSubmissionPortal( config.DSP_API_URL), exclude_types=exclude_types, alias_prefix=alias_prefix) thread = threading.Thread(target=async_archive, args=(ingest_api, archiver, submission_uuid)) thread.start() response = {'message': 'successfully triggered!'} return jsonify(response)
def get_submission_entities(dsp_submission_uuid: str): ingest_api = IngestAPI(config.INGEST_API_URL) ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid( dsp_submission_uuid) entities_url = ingest_archive_submission['_links']['entities']['href'] params = request.args response_body = ingest_api.get(entities_url, params=params) result = { 'entities': response_body['_embedded']['archiveEntities'], 'page': response_body.get('page') } for entity in result['entities']: del entity['_links'] return jsonify(result)
def complete(dsp_submission_uuid: str): dsp_api = DataSubmissionPortal(config.DSP_API_URL) ingest_api = IngestAPI(config.INGEST_API_URL) thread = threading.Thread(target=async_complete, args=(dsp_api, dsp_submission_uuid, ingest_api)) thread.start() response = {'message': 'successfully triggered!'} return response_json(HTTPStatus.ACCEPTED, data=response)
def async_archive(ingest_api: IngestAPI, archiver: IngestArchiver, submission_uuid: str): logger.info('Starting...') start = time.time() manifests = ingest_api.get_manifest_ids_from_submission(submission_uuid) try: entity_map: ArchiveEntityMap = archiver.convert(manifests) dsp_submission, ingest_tracker = archiver.archive_metadata(entity_map) archiver.notify_file_archiver(dsp_submission) ingest_tracker.patch_archive_submission({ 'submissionUuid': submission_uuid, 'fileUploadPlan': dsp_submission.file_upload_info }) end = time.time() logger.info( f'Creating DSP submission for {submission_uuid} finished in {end - start}s' ) except Exception as e: logger.exception(e) raise
def get_submission(dsp_submission_uuid: str): ingest_api = IngestAPI(config.INGEST_API_URL) ingest_archive_submission = ingest_api.get_archive_submission_by_dsp_uuid( dsp_submission_uuid) del ingest_archive_submission['_links'] return jsonify(ingest_archive_submission)
class ArchiveCLI: def __init__(self, alias_prefix, output_dir, exclude_types, no_validation): self.manifests = [] self.ingest_api = IngestAPI(config.INGEST_API_URL) self.dsp_api = DataSubmissionPortal(config.DSP_API_URL) now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S") self.output_dir = output_dir if output_dir else f"output/ARCHIVER_{now}" self.archiver = IngestArchiver(ingest_api=self.ingest_api, dsp_api=self.dsp_api, exclude_types=self.split_exclude_types(exclude_types), alias_prefix=alias_prefix, dsp_validation=not no_validation) def get_manifests_from_project(self, project_uuid): logging.info(f'GETTING MANIFESTS FOR PROJECT: {project_uuid}') self.manifests = self.ingest_api.get_manifest_ids_from_project(project_uuid=project_uuid) def get_manifests_from_submission(self, submission_uuid): logging.info(f'GETTING MANIFESTS FOR SUBMISSION: {submission_uuid}') self.manifests = self.ingest_api.get_manifest_ids_from_submission(submission_uuid) def get_manifests_from_list(self, manifest_list_file): logging.info(f'GETTING MANIFESTS FROM FILE: {manifest_list_file}') with open(manifest_list_file) as f: content = f.readlines() parsed_manifest_list = [x.strip() for x in content] self.manifests = parsed_manifest_list def complete_submission(self, dsp_submission_url): logging.info(f'##################### COMPLETING DSP SUBMISSION {dsp_submission_url}') archive_submission = ArchiveSubmission(dsp_api=self.archiver.dsp_api, dsp_submission_url=dsp_submission_url) ingest_archive_submission = self.ingest_api.get_archive_submission_by_dsp_uuid(archive_submission.dsp_uuid) ingest_entities = self.ingest_api.get_related_entity(ingest_archive_submission, 'entities', 'archiveEntities') entity_map = ArchiveEntityMap.map_from_ingest_entities(ingest_entities) archive_submission = self.archiver.complete_submission(dsp_submission_url, entity_map) report = archive_submission.generate_report() self.save_dict_to_file(f'COMPLETE_SUBMISSION_{archive_submission.dsp_uuid}', report) def build_map(self): logging.info(f'Processing {len(self.manifests)} manifests:\n' + "\n".join(map(str, self.manifests))) entity_map: ArchiveEntityMap = self.archiver.convert(self.manifests) summary = entity_map.get_conversion_summary() logging.info(f'Entities to be converted: {json.dumps(summary)}') report = entity_map.generate_report() logging.info("Saving Report file...") self.save_dict_to_file("REPORT", report) return entity_map def load_map(self, load_path): logging.info(f'Loading Entity Map: {load_path}') file_content: dict = self.load_dict_from_file(load_path) if file_content.get('entities'): return ArchiveEntityMap.map_from_report(file_content['entities']) logging.error(f"--load_path files does not have an entities object: {file_content}") exit(2) def validate_submission(self, entity_map: ArchiveEntityMap, submit, ingest_submission_uuid=None): archive_submission, ingest_archive_submission = self.archiver.archive_metadata(entity_map) all_messages = self.archiver.notify_file_archiver(archive_submission) ingest_archive_submission.patch_archive_submission({ 'submissionUuid': ingest_submission_uuid, 'fileUploadPlan': archive_submission.file_upload_info }) report = archive_submission.generate_report() logging.info("Updating Report file...") self.save_dict_to_file("REPORT", report) logging.info("##################### FILE ARCHIVER NOTIFICATION") self.save_dict_to_file("FILE_UPLOAD_INFO", {"jobs": all_messages}) if submit: archive_submission.validate_and_submit() else: archive_submission.validate() def generate_validation_error_report(self, dsp_submission_url): submission = ArchiveSubmission(dsp_api=self.archiver.dsp_api, dsp_submission_url=dsp_submission_url) self.save_dict_to_file("VALIDATION_ERROR_REPORT", submission.get_validation_error_report()) def save_dict_to_file(self, file_name, json_content): if not self.output_dir: return directory = os.path.abspath(self.output_dir) if not os.path.exists(directory): os.makedirs(directory) file = directory + "/" + file_name + ".json" if os.path.exists(file): os.remove(file) with open(file, "w") as open_file: json.dump(json_content, open_file, indent=4) open_file.close() logging.info(f"Saved to {directory}/{file_name}.json!") @staticmethod def load_dict_from_file(file_path): path = os.path.abspath(file_path) if os.path.exists(path) and os.path.isfile(path): with open(path, 'r') as open_file: content = open_file.read() return json.loads(content) else: logging.error(f"--load_path does not exist or is not a file: {file_path}") exit(2) @staticmethod def split_exclude_types(exclude_types): if exclude_types: exclude_types = [x.strip() for x in exclude_types.split(',')] logging.warning(f"Excluding {', '.join(exclude_types)}") return exclude_types