def delete_submission(recid): """ Submissions can only be removed if they are not finalised, meaning they should never be in the index. Only delete the latest version of a submission. Delete indexed information only if version = 1. :param recid: :return: """ if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \ or check_is_sandbox_record(recid): submission = get_latest_hepsubmission(publication_recid=recid) unload_submission(recid, submission.version) if submission.version == 1: admin_idx = AdminIndexer() admin_idx.find_and_delete('recid', recid) return json.dumps({"success": True, "recid": recid, "errors": [ "Record successfully removed!"]}) else: return json.dumps( {"success": False, "recid": recid, "errors": [ "You do not have permission to delete this submission. " "Only coordinators can do that."]})
def process_submission_payload(*args, **kwargs): """ Processes the submission payload. :param inspire_id: :param title: :param reviewer: :param uploader: :param send_upload_email: :return: """ if kwargs.get('inspire_id'): content, status = get_inspire_record_information(kwargs.get('inspire_id')) content["inspire_id"] = kwargs.get('inspire_id') elif kwargs.get('title'): content = {'title': kwargs.get('title')} else: raise ValueError(message="A title or inspire_id must be provided.") record_information = create_record(content) submitter_id = kwargs.get('submitter_id') if submitter_id is None: submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()) hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id) if kwargs.get('inspire_id'): hepsubmission.inspire_id = kwargs.get('inspire_id') db.session.add(hepsubmission) reviewer_details = kwargs.get('reviewer') reviewer = create_participant_record( reviewer_details.get('name'), reviewer_details.get('email'), 'reviewer', 'primary', record_information['recid']) hepsubmission.participants.append(reviewer) uploader_details = kwargs.get('uploader') uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'), 'uploader', 'primary', record_information['recid']) hepsubmission.participants.append(uploader) db.session.commit() if kwargs.get('send_upload_email', True): # Now Send Email only to the uploader first. The reviewer will be asked to # review only when an upload has been performed. message = kwargs.get('message', None) send_cookie_email(uploader, record_information, message) admin_idx = AdminIndexer() admin_idx.index_submission(hepsubmission) return hepsubmission
def reindex(): if has_role(current_user, 'admin'): reindex_all(recreate=True) push_data_keywords() admin_idx = AdminIndexer() admin_idx.reindex(recreate=True) return jsonify({"success": True}) else: return jsonify({"success": False, 'message': "You don't have sufficient privileges to " "perform this action."})
def find_duplicates_and_remove(): """Will go through the application to find any duplicates then remove them.""" inspire_ids = get_all_ids_in_current_system(prepend_id_with="") duplicates = [] for inspire_id in inspire_ids: matches = get_records_matching_field('inspire_id', inspire_id, doc_type=CFG_PUB_TYPE) if len(matches['hits']['hits']) > 1: duplicates.append(matches['hits']['hits'][0]['_source']['recid']) print('There are {} duplicates. Going to remove.'.format(len(duplicates))) do_unload(duplicates) # reindex submissions for dashboard view admin_indexer = AdminIndexer() admin_indexer.reindex(recreate=True)
def find_duplicates_and_remove(base_url): """Will go through the application to find any duplicates then remove them.""" inspire_ids = importer_api.get_inspire_ids(base_url=base_url) if inspire_ids is not False: duplicates = [] for inspire_id in inspire_ids: matches = get_records_matching_field('inspire_id', inspire_id, doc_type=CFG_PUB_TYPE) if len(matches['hits']['hits']) > 1: duplicates.append( matches['hits']['hits'][0]['_source']['recid']) print('There are {} duplicates. Going to remove.'.format( len(duplicates))) do_unload(duplicates) # reindex submissions for dashboard view admin_indexer = AdminIndexer() admin_indexer.reindex(recreate=True)
def find_duplicates_and_remove(): """ Will go through the application to find any duplicates then remove them. :return: """ inspire_ids = get_all_ids_in_current_system(prepend_id_with="") duplicates = [] for inspire_id in inspire_ids: matches = get_records_matching_field('inspire_id', inspire_id, doc_type=CFG_PUB_TYPE) if len(matches['hits']['hits']) > 1: duplicates.append(matches['hits']['hits'][0]['_source']['recid']) print('There are {} duplicates. Going to remove.'.format(len(duplicates))) do_unload(duplicates) # reindex submissions for dashboard view admin_indexer = AdminIndexer() admin_indexer.reindex(recreate=True)
def delete_submission(recid): """ Submissions can only be removed if they are not finalised, meaning they should never be in the index. :param recid: :return: """ if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \ or check_is_sandbox_record(recid): unload_submission(recid) admin_idx = AdminIndexer() admin_idx.reindex(recreate=True) return json.dumps({"success": True, "recid": recid, "errors": [ "Record successfully removed!"]}) else: return json.dumps( {"success": False, "recid": recid, "errors": [ "You do not have permission to delete this submission. " "Only coordinators can do that."]})
def admin_idx(app): with app.app_context(): admin_idx = AdminIndexer() return admin_idx
def reindex(): """Reindexes HEPSubmissions and adds to the submission index""" admin_idx = AdminIndexer() admin_idx.reindex(recreate=True)
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() print('Finalising record {}'.format(recid)) generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index(record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get('TESTING', False) and not current_app.config.get('NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay(submission.id, submission.version) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) send_finalised_email(hep_submission) for file_format in ['csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet(record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format(record.get('inspire_id'))) return json.dumps({"success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids}) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps( {"success": False, "recid": recid, "errors": ["You do not have permission to finalise this " "submission. Only coordinators can do that."]})
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file = open(submission_file_path, 'r') submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) data_file_validator = DataFileValidator() if is_valid_submission_file: try: submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader) except: submission_processed = yaml.safe_load_all(submission_file) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission( publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission, update) for yaml_document in submission_processed: if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document: # comments are only present in the general submission # information document. process_general_submission_info(basepath, yaml_document, recid) else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one() datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) main_file_path = os.path.join(basepath, yaml_document["data_file"]) if data_file_validator.validate(file_path=main_file_path): process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions(recid, hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() data_file_validator.clear_messages() else: # return an error errors = { "submission.yaml": [{ "level": "error", "message": "No submission.yaml file found in submission." }] } return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors
def process_submission_directory(basepath, submission_file_path, recid, update=False, old_data_schema=False, old_submission_schema=False): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :param update: :param old_data_schema: whether to use old (v0) data schema :param old_submission_schema: whether to use old (v0) submission schema (should only be used when importing old records) :return: """ added_file_names = [] errors = {} if submission_file_path is None: # return an error errors = {"submission.yaml": [ {"level": "error", "message": "No submission.yaml file found in submission."} ]} return errors try: with open(submission_file_path, 'r') as submission_file: submission_processed = list(yaml.load_all(submission_file, Loader=Loader)) except Exception as ex: errors = {"submission.yaml": [ {"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)} ]} return errors submission_file_validator = get_submission_validator(old_submission_schema) is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path, data=submission_processed, ) if is_valid_submission_file: # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False reserve_doi_for_hepsubmission(hepsubmission, update) no_general_submission_info = True data_file_validator = get_data_validator(old_data_schema) # Delete all data records associated with this submission. # Fixes problems with ordering where the table names are changed between uploads. # See https://github.com/HEPData/hepdata/issues/112 # Side effect that reviews will be deleted between uploads. cleanup_submission(recid, hepsubmission.version, added_file_names) for yaml_document_index, yaml_document in enumerate(submission_processed): if not yaml_document: continue # Check for presence of local files given as additional_resources. if 'additional_resources' in yaml_document: for resource in yaml_document['additional_resources']: location = os.path.join(basepath, resource['location']) if not resource['location'].startswith(('http', '/resource/')): if not os.path.isfile(location): errors[resource['location']] = [{"level": "error", "message": "Missing 'additional_resources' file from uploaded archive."}] elif '/' in resource['location']: errors[resource['location']] = [{"level": "error", "message": "Location of 'additional_resources' file should not contain '/'."}] if not yaml_document_index and 'name' not in yaml_document: no_general_submission_info = False process_general_submission_info(basepath, yaml_document, recid) elif not all(k in yaml_document for k in ('name', 'description', 'keywords', 'data_file')): errors["submission.yaml"] = [{"level": "error", "message": "YAML document with index {} ".format( yaml_document_index) + "missing one or more required keys (name, description, keywords, data_file)."}] else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=yaml_document["name"], publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) try: if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=yaml_document["name"], description=yaml_document["description"], version=hepsubmission.version) db.session.add(datasubmission) else: error = {"level": "error", "message": "Duplicate table with name '{}'.".format(yaml_document["name"])} errors.setdefault('submission.yaml', []).append(error) continue except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{"level": "error", "message": str(sqlex)}] db.session.rollback() continue main_file_path = os.path.join(basepath, yaml_document["data_file"]) data, ex = _read_data_file(main_file_path) if not data or data is None or ex is not None: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)}] elif '/' in yaml_document["data_file"]: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "Name of data_file should not contain '/'.\n"}] else: schema_type = yaml_document.get('data_schema') # Optional if data_file_validator.validate(file_path=main_file_path, file_type=schema_type, data=data): try: process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{"level": "error", "message": "There was a problem processing the file.\n" + str(sqlex)}] db.session.rollback() else: errors.update(process_validation_errors_for_display(data_file_validator.get_messages())) data_file_validator.clear_messages() if no_general_submission_info: hepsubmission.last_updated = datetime.utcnow() db.session.add(hepsubmission) db.session.commit() # The line below is commented out since it does not preserve the order of tables. # Delete all tables above instead: side effect of deleting reviews between uploads. #cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: errors = package_submission(basepath, recid, hepsubmission) # Check the size of the upload to ensure it can be converted data_filepath = find_submission_data_file_path(hepsubmission) with prepare_data_folder(data_filepath, 'yaml') as filepaths: input_directory, input_file = filepaths # Create options that look like a worst-case (biggest) # conversions (using yoda-like options as they include rivet # analysis dummy_inspire_id = hepsubmission.inspire_id or '0000000' options = { 'input_format': 'yaml', 'output_format': 'yoda', 'filename': f'HEPData-ins{dummy_inspire_id}-v{hepsubmission.version}-yoda', 'validator_schema_version': '0.1.0', 'hepdata_doi': f'10.17182/hepdata.{recid}.v{hepsubmission.version}', 'rivet_analysis_name': f'ATLAS_2020_I{dummy_inspire_id}' } data_size = get_data_size(input_directory, options) if data_size > current_app.config['CONVERT_MAX_SIZE']: errors["Archive"] = [{ "level": "error", "message": "Archive is too big for conversion to other formats. (%s bytes would be sent to converter; maximum size is %s.)" % (data_size, current_app.config['CONVERT_MAX_SIZE']) }] if len(errors) == 0: reserve_dois_for_data_submissions(publication_recid=recid, version=hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: errors = process_validation_errors_for_display(submission_file_validator.get_messages()) submission_file_validator.clear_messages() # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :param update: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) if is_valid_submission_file: submission_file = open(submission_file_path, 'r') submission_processed = yaml.load_all(submission_file, Loader=Loader) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission( publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission, update) no_general_submission_info = True data_file_validator = DataFileValidator() # Delete all data records associated with this submission. # Fixes problems with ordering where the table names are changed between uploads. # See https://github.com/HEPData/hepdata/issues/112 # Side effect that reviews will be deleted between uploads. cleanup_submission(recid, hepsubmission.version, added_file_names) for yaml_document_index, yaml_document in enumerate( submission_processed): if not yaml_document: continue # Check for presence of local files given as additional_resources. if 'additional_resources' in yaml_document: for resource in yaml_document['additional_resources']: location = os.path.join(basepath, resource['location']) if not resource['location'].startswith( ('http', '/resource/')): if not os.path.isfile(location): errors[resource['location']] = [{ "level": "error", "message": "Missing 'additional_resources' file from uploaded archive." }] elif '/' in resource['location']: errors[resource['location']] = [{ "level": "error", "message": "Location of 'additional_resources' file should not contain '/'." }] if not yaml_document_index and 'name' not in yaml_document: no_general_submission_info = False process_general_submission_info(basepath, yaml_document, recid) elif not all(k in yaml_document for k in ('name', 'description', 'keywords', 'data_file')): errors["submission.yaml"] = [{ "level": "error", "message": "YAML document with index {} ".format( yaml_document_index) + "missing one or more required keys (name, description, keywords, data_file)." }] else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) try: if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one( ) datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{ "level": "error", "message": str(sqlex) }] db.session.rollback() continue main_file_path = os.path.join(basepath, yaml_document["data_file"]) data, ex = _eos_fix_read_data(main_file_path) if not data or data is None or ex is not None: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)}] elif '/' in yaml_document["data_file"]: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "Name of data_file should not contain '/'.\n"}] else: if data_file_validator.validate( file_path=main_file_path, data=data): try: process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{ "level": "error", "message": "There was a problem processing the file.\n" + str(sqlex) }] db.session.rollback() else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() if yaml_document["data_file"] not in errors: # Check that the length of the 'values' list is consistent # for each of the independent_variables and dependent_variables. indep_count = [ len(indep['values']) for indep in data['independent_variables'] ] dep_count = [ len(dep['values']) for dep in data['dependent_variables'] ] if len(set(indep_count + dep_count) ) > 1: # if more than one unique count errors.setdefault( yaml_document["data_file"], [] ).append({ "level": "error", "message": "Inconsistent length of 'values' list:\n" + "independent_variables{}, dependent_variables{}" .format(str(indep_count), str(dep_count)) }) submission_file.close() if no_general_submission_info: hepsubmission.last_updated = datetime.now() db.session.add(hepsubmission) db.session.commit() # The line below is commented out since it does not preserve the order of tables. # Delete all tables above instead: side effect of deleting reviews between uploads. #cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: errors = package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions( publication_recid=recid, version=hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: # delete all tables if errors cleanup_submission(recid, hepsubmission.version, {}) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() else: # return an error errors = { "submission.yaml": [{ "level": "error", "message": "No submission.yaml file found in submission." }] } return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors
def submissions_list(): admin_idx = AdminIndexer() summary = admin_idx.get_summary() return jsonify(summary)
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False, convert=True): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ print('Finalising record {}'.format(recid)) hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index( record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get( 'TESTING', False) and not current_app.config.get( 'NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay( submission.id, submission.version) log.info("Generating DOIs for ins{0}".format( hep_submission.inspire_id)) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) try: admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) except ConnectionTimeout as ct: log.error('Unable to add ins{0} to admin index.\n{1}'.format( hep_submission.inspire_id, ct)) send_finalised_email(hep_submission) if convert: for file_format in ['yaml', 'csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet( record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format( record.get('inspire_id')), version) return json.dumps({ "success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids }) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps({ "success": False, "recid": recid, "errors": [ "You do not have permission to finalise this " "submission. Only coordinators can do that." ] })
def remove_submission(record_id): """ Removes the database entries related to a record. :param record_id: :return: True if Successful, False if the record does not exist. """ hepdata_submissions = HEPSubmission.query.filter_by( publication_recid=record_id).all() try: try: for hepdata_submission in hepdata_submissions: db.session.delete(hepdata_submission) except NoResultFound as nrf: print(nrf.args) admin_idx = AdminIndexer() admin_idx.find_and_delete(term=record_id, fields=['recid']) submissions = DataSubmission.query.filter_by( publication_recid=record_id).all() reviews = DataReview.query.filter_by(publication_recid=record_id).all() for review in reviews: db.session.delete(review) for submission in submissions: resource = DataResource.query.filter_by( id=submission.data_file).first() db.session.delete(submission) if resource: db.session.delete(resource) try: SubmissionParticipant.query.filter_by( publication_recid=record_id).delete() except Exception: print("Unable to find a submission participant for {0}".format( record_id)) try: record = get_record_by_id(record_id) data_records = get_records_matching_field('related_publication', record_id, doc_type=CFG_DATA_TYPE) if 'hits' in data_records: for data_record in data_records['hits']['hits']: data_record_obj = get_record_by_id( data_record['_source']['recid']) if data_record_obj: data_record_obj.delete() if record: record.delete() except PIDDoesNotExistError as e: print( 'No record entry exists for {0}. Proceeding to delete other files.' .format(record_id)) db.session.commit() db.session.flush() return True except Exception as e: db.session.rollback() raise e
def reindex(): """Reindexes HEPSubmissions and adds to the submission index.""" admin_idx = AdminIndexer() admin_idx.reindex(recreate=True)
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file = open(submission_file_path, 'r') submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) data_file_validator = DataFileValidator() if is_valid_submission_file: try: submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader) except: submission_processed = yaml.safe_load_all(submission_file) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission) for yaml_document in submission_processed: if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document: # comments are only present in the general submission # information document. process_general_submission_info(basepath, yaml_document, recid) else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one() datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) main_file_path = os.path.join(basepath, yaml_document["data_file"]) if data_file_validator.validate(file_path=main_file_path): process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions(recid, hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() data_file_validator.clear_messages() else: # return an error errors = {"submission.yaml": [ {"level": "error", "message": "No submission.yaml file found in submission."} ]} return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors