def reindex(): if has_role(current_user, 'admin'): reindex_all(recreate=True) push_data_keywords() return jsonify({"success": True}) else: return jsonify({"success": False, 'message': "You don't have sufficient privileges to " "perform this action."})
def reindex(): if has_role(current_user, 'admin'): reindex_all(recreate=True) push_data_keywords() admin_idx = AdminIndexer() admin_idx.reindex(recreate=True) return jsonify({"success": True}) else: return jsonify({"success": False, 'message': "You don't have sufficient privileges to " "perform this action."})
def create_missing_datasubmission_records(): # Get submissions with missing IDs missing_submissions = DataSubmission.query \ .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid) \ .filter( DataSubmission.associated_recid == None, DataSubmission.publication_inspire_id == None, DataSubmission.version == HEPSubmission.version, HEPSubmission.overall_status == 'finished') missing_submissions = missing_submissions.all() if not missing_submissions: print("No datasubmissions found with missing record or inspire ids.") return # Organise missing submissions by publication submissions_by_publication = {} for submission in missing_submissions: if submission.publication_recid in submissions_by_publication: submissions_by_publication[submission.publication_recid].append( submission) else: submissions_by_publication[submission.publication_recid] = [ submission ] # Loop through each publication for publication_recid, submissions in submissions_by_publication.items(): publication_record = get_record_by_id(publication_recid) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.utcnow()) generated_record_ids = [] for submission in submissions: # Finalise each data submission that does not have a record finalise_datasubmission(current_time, {}, generated_record_ids, publication_record, publication_recid, submission, submission.version) # Register the datasubmission's DOI if not current_app.config.get('TESTING', False): generate_doi_for_table.delay(submission.doi) print(f"Generated DOI {submission.doi}") else: print(f"Would generate DOI {submission.doi}") # finalise_datasubmission does not commit, so commit once for each publication db.session.commit() # Reindex the publication and its updated datasubmissions index_record_ids([publication_recid] + generated_record_ids) push_data_keywords(pub_ids=[publication_recid])
def update_record_info(inspire_id, send_email=False): """Update publication information from INSPIRE for a specific record.""" if inspire_id is None: log.error("Inspire ID is None") return 'Inspire ID is None' inspire_id = inspire_id.replace("ins", "") hep_submission = get_latest_hepsubmission(inspire_id=inspire_id) if hep_submission is None: log.warning("Failed to retrieve HEPData submission for Inspire ID {0}".format(inspire_id)) return 'No HEPData submission' publication_recid = hep_submission.publication_recid log.info("Updating recid {} with information from Inspire record {}".format(publication_recid, inspire_id)) updated_inspire_record_information, status = get_inspire_record_information(inspire_id) if status == 'success': # Also need to update publication information for data records. data_submissions = DataSubmission.query.filter_by( publication_recid=publication_recid, version=hep_submission.version ).order_by(DataSubmission.id.asc()) record_ids = [publication_recid] # list of record IDs for data_submission in data_submissions: record_ids.append(data_submission.associated_recid) same_information = {} for index, recid in enumerate(record_ids): if index == 0: updated_record_information = updated_inspire_record_information else: # Only update selected keys for data records. updated_record_information = { key: updated_inspire_record_information[key] for key in ( 'authors', 'creation_date', 'journal_info', 'collaborations' ) } record_information = get_record_by_id(recid) same_information[recid] = True for key, value in updated_record_information.items(): if key not in record_information or record_information[key] != value: log.debug('For recid {}, key {} has new value {}'.format(recid, key, value)) same_information[recid] = False update_record(recid, updated_record_information) break log.info('For recid {}, information needs to be updated: {}'.format(recid, str(not(same_information[recid])))) if all(same for same in same_information.values()): return 'No update needed' else: log.warning("Failed to retrieve publication information for Inspire record {0}".format(inspire_id)) return 'Invalid Inspire ID' if hep_submission.overall_status == 'finished': index_record_ids(record_ids) # index for Elasticsearch push_data_keywords(pub_ids=[recid]) if not TESTING: generate_dois_for_submission.delay(inspire_id=inspire_id) # update metadata stored in DataCite if send_email: record_information = get_record_by_id(publication_recid) notify_publication_update(hep_submission, record_information) # send email to all participants return 'Success'
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False, convert=True): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ print('Finalising record {}'.format(recid)) hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index( record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get( 'TESTING', False) and not current_app.config.get( 'NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay( submission.id, submission.version) log.info("Generating DOIs for ins{0}".format( hep_submission.inspire_id)) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) try: admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) except ConnectionTimeout as ct: log.error('Unable to add ins{0} to admin index.\n{1}'.format( hep_submission.inspire_id, ct)) send_finalised_email(hep_submission) if convert: for file_format in ['yaml', 'csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet( record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format( record.get('inspire_id')), version) return json.dumps({ "success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids }) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps({ "success": False, "recid": recid, "errors": [ "You do not have permission to finalise this " "submission. Only coordinators can do that." ] })
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() print('Finalising record {}'.format(recid)) generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index(record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get('TESTING', False) and not current_app.config.get('NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay(submission.id, submission.version) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) send_finalised_email(hep_submission) for file_format in ['csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet(record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format(record.get('inspire_id'))) return json.dumps({"success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids}) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps( {"success": False, "recid": recid, "errors": ["You do not have permission to finalise this " "submission. Only coordinators can do that."]})