def update_file(inspire_id, recid, only_record_information=False, send_tweet=False): self = Migrator() output_location = self.prepare_files_for_submission(inspire_id, force_retrieval=True) if output_location: updated_record_information = self.retrieve_publication_information(inspire_id) record_information = update_record(recid, updated_record_information) if not only_record_information: try: recid = self.load_submission( record_information, output_location, os.path.join(output_location, "submission.yaml"), update=True) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) else: index_record_ids([record_information['recid']]) else: log.error('Failed to load {0}'.format(inspire_id))
def update_analyses(): endpoints = current_app.config["ANALYSES_ENDPOINTS"] for analysis_endpoint in endpoints: if "endpoint_url" in endpoints[analysis_endpoint]: log.info("Updating analyses from {0}...".format(analysis_endpoint)) response = requests.get( endpoints[analysis_endpoint]["endpoint_url"]) if response: analyses = response.json() for record in analyses: submission = get_latest_hepsubmission( inspire_id=record, overall_status='finished') if submission: num_new_resources = 0 for analysis in analyses[record]: _resource_url = endpoints[analysis_endpoint][ "url_template"].format(analysis) if not is_resource_added_to_submission( submission.publication_recid, submission.version, _resource_url): print( 'Adding {} analysis to ins{} with URL {}'. format(analysis_endpoint, record, _resource_url)) new_resource = DataResource( file_location=_resource_url, file_type=analysis_endpoint) submission.resources.append(new_resource) num_new_resources += 1 if num_new_resources: try: db.session.add(submission) db.session.commit() index_record_ids( [submission.publication_recid]) except Exception as e: db.session.rollback() log.error(e) else: log.debug( "An analysis is available in {0} but with no equivalent in HEPData (ins{1})." .format(analysis_endpoint, record)) else: log.debug( "No endpoint url configured for {0}".format(analysis_endpoint))
def update_file(inspire_id, recid, force=False, only_record_information=False, send_tweet=False, convert=False): self = Migrator() output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id, force_retrieval=True) if output_location: updated_record_information = self.retrieve_publication_information( inspire_id) record_information = update_record(recid, updated_record_information) hep_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() version_count = HEPSubmission.query.filter_by( publication_recid=recid).count() print('Old site last updated {}'.format(str(oldsite_last_updated))) print('New site last updated {}'.format( str(hep_submission.last_updated))) print('Coordinator ID is {}, version count is {}'.format( hep_submission.coordinator, version_count)) allow_update = hep_submission.last_updated < oldsite_last_updated and \ hep_submission.coordinator == 1 and version_count == 1 if not only_record_information and (allow_update or force): try: recid = self.load_submission(record_information, output_location, os.path.join( output_location, "submission.yaml"), update=True) print('Loaded record {}'.format(recid)) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True, convert=convert) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) elif not only_record_information: print('Not updating record {}'.format(recid)) else: index_record_ids([record_information["recid"]]) else: log.error("Failed to load {0}".format(inspire_id))
def create_missing_datasubmission_records(): # Get submissions with missing IDs missing_submissions = DataSubmission.query \ .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid) \ .filter( DataSubmission.associated_recid == None, DataSubmission.publication_inspire_id == None, DataSubmission.version == HEPSubmission.version, HEPSubmission.overall_status == 'finished') missing_submissions = missing_submissions.all() if not missing_submissions: print("No datasubmissions found with missing record or inspire ids.") return # Organise missing submissions by publication submissions_by_publication = {} for submission in missing_submissions: if submission.publication_recid in submissions_by_publication: submissions_by_publication[submission.publication_recid].append( submission) else: submissions_by_publication[submission.publication_recid] = [ submission ] # Loop through each publication for publication_recid, submissions in submissions_by_publication.items(): publication_record = get_record_by_id(publication_recid) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.utcnow()) generated_record_ids = [] for submission in submissions: # Finalise each data submission that does not have a record finalise_datasubmission(current_time, {}, generated_record_ids, publication_record, publication_recid, submission, submission.version) # Register the datasubmission's DOI if not current_app.config.get('TESTING', False): generate_doi_for_table.delay(submission.doi) print(f"Generated DOI {submission.doi}") else: print(f"Would generate DOI {submission.doi}") # finalise_datasubmission does not commit, so commit once for each publication db.session.commit() # Reindex the publication and its updated datasubmissions index_record_ids([publication_recid] + generated_record_ids) push_data_keywords(pub_ids=[publication_recid])
def add_resource(type, identifier, version): """ Adds a data resource to either the submission or individual data files. :param type: :param identifier: :param version: :return: """ submission = None inspire_id = None recid = None if type == "submission": submission = HEPSubmission.query.filter_by(publication_recid=identifier, version=version).one() if submission: inspire_id = submission.inspire_id recid = submission.publication_recid elif type == "data": submission = DataSubmission.query.filter_by(id=identifier).one() if submission: inspire_id = submission.publication_inspire_id recid = submission.publication_recid if not user_allowed_to_perform_action(recid): abort(403) analysis_type = request.form.get("analysisType", None) analysis_other = request.form.get("analysisOther", None) analysis_url = request.form.get("analysisURL", None) analysis_description = request.form.get("analysisDescription", None) if analysis_type == "other": analysis_type = analysis_other if analysis_type and analysis_url: if submission: new_resource = DataResource( file_location=analysis_url, file_type=analysis_type, file_description=str(analysis_description) ) submission.resources.append(new_resource) try: db.session.add(submission) db.session.commit() try: index_record_ids([recid]) except: log.error("Failed to reindex {0}".format(recid)) if inspire_id: return redirect("/record/ins{0}".format(inspire_id)) else: return redirect("/record/{0}".format(recid)) except Exception as e: db.session.rollback() raise e return render_template( "hepdata_records/error_page.html", recid=None, header_message="Error adding resource.", message="Unable to add resource. Please try again.", errors={}, )
def update_file(inspire_id, recid, force=False, only_record_information=False, send_email=False, send_tweet=False, convert=False): self = Migrator() output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id, force_retrieval=True) if output_location: updated_record_information, status = self.retrieve_publication_information( inspire_id) if status == 'success': record_information = update_record(recid, updated_record_information) else: log.error("Failed to retrieve publication information for {0}". format(inspire_id)) return hep_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() version_count = HEPSubmission.query.filter_by( publication_recid=recid).count() print('Old site last updated {}'.format(str(oldsite_last_updated))) print('New site last updated {}'.format( str(hep_submission.last_updated))) print('Coordinator ID is {}, version count is {}'.format( hep_submission.coordinator, version_count)) allow_update = (hep_submission.last_updated < oldsite_last_updated or force) and \ hep_submission.coordinator == 1 and version_count == 1 if not only_record_information and allow_update: try: recid = self.load_submission(record_information, output_location, os.path.join( output_location, "submission.yaml"), update=True) print('Loaded record {}'.format(recid)) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True, convert=convert) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) elif not only_record_information: print('Not updating record {}'.format(recid)) else: index_record_ids([record_information["recid"]]) _cleaned_id = inspire_id.replace("ins", "") generate_dois_for_submission.delay( inspire_id=_cleaned_id ) # update metadata stored in DataCite if send_email: notify_publication_update( hep_submission, record_information) # send email to all participants else: log.error("Failed to load {0}".format(inspire_id))
def add_resource(type, identifier, version): """ Adds a data resource to either the submission or individual data files. :param type: :param identifier: :param version: :return: """ submission = None inspire_id = None recid = None if type == 'submission': submission = HEPSubmission.query.filter_by(publication_recid=identifier, version=version).one() if submission: inspire_id = submission.inspire_id recid = submission.publication_recid elif type == 'data': submission = DataSubmission.query.filter_by(id=identifier).one() if submission: inspire_id = submission.publication_inspire_id recid = submission.publication_recid if not user_allowed_to_perform_action(recid): abort(403) analysis_type = request.form.get('analysisType', None) analysis_other = request.form.get('analysisOther', None) analysis_url = request.form.get('analysisURL', None) analysis_description = request.form.get('analysisDescription', None) if analysis_type == 'other': analysis_type = analysis_other if analysis_type and analysis_url: if submission: new_resource = DataResource(file_location=analysis_url, file_type=analysis_type, file_description=str(analysis_description)) submission.resources.append(new_resource) try: db.session.add(submission) db.session.commit() try: index_record_ids([recid]) except: log.error('Failed to reindex {0}'.format(recid)) if inspire_id: return redirect('/record/ins{0}'.format(inspire_id)) else: return redirect('/record/{0}'.format(recid)) except Exception as e: db.session.rollback() raise e return render_template('hepdata_records/error_page.html', recid=None, header_message='Error adding resource.', message='Unable to add resource. Please try again.', errors={})
def update_record_info(inspire_id, send_email=False): """Update publication information from INSPIRE for a specific record.""" if inspire_id is None: log.error("Inspire ID is None") return 'Inspire ID is None' inspire_id = inspire_id.replace("ins", "") hep_submission = get_latest_hepsubmission(inspire_id=inspire_id) if hep_submission is None: log.warning("Failed to retrieve HEPData submission for Inspire ID {0}".format(inspire_id)) return 'No HEPData submission' publication_recid = hep_submission.publication_recid log.info("Updating recid {} with information from Inspire record {}".format(publication_recid, inspire_id)) updated_inspire_record_information, status = get_inspire_record_information(inspire_id) if status == 'success': # Also need to update publication information for data records. data_submissions = DataSubmission.query.filter_by( publication_recid=publication_recid, version=hep_submission.version ).order_by(DataSubmission.id.asc()) record_ids = [publication_recid] # list of record IDs for data_submission in data_submissions: record_ids.append(data_submission.associated_recid) same_information = {} for index, recid in enumerate(record_ids): if index == 0: updated_record_information = updated_inspire_record_information else: # Only update selected keys for data records. updated_record_information = { key: updated_inspire_record_information[key] for key in ( 'authors', 'creation_date', 'journal_info', 'collaborations' ) } record_information = get_record_by_id(recid) same_information[recid] = True for key, value in updated_record_information.items(): if key not in record_information or record_information[key] != value: log.debug('For recid {}, key {} has new value {}'.format(recid, key, value)) same_information[recid] = False update_record(recid, updated_record_information) break log.info('For recid {}, information needs to be updated: {}'.format(recid, str(not(same_information[recid])))) if all(same for same in same_information.values()): return 'No update needed' else: log.warning("Failed to retrieve publication information for Inspire record {0}".format(inspire_id)) return 'Invalid Inspire ID' if hep_submission.overall_status == 'finished': index_record_ids(record_ids) # index for Elasticsearch push_data_keywords(pub_ids=[recid]) if not TESTING: generate_dois_for_submission.delay(inspire_id=inspire_id) # update metadata stored in DataCite if send_email: record_information = get_record_by_id(publication_recid) notify_publication_update(hep_submission, record_information) # send email to all participants return 'Success'
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False, convert=True): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ print('Finalising record {}'.format(recid)) hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index( record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get( 'TESTING', False) and not current_app.config.get( 'NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay( submission.id, submission.version) log.info("Generating DOIs for ins{0}".format( hep_submission.inspire_id)) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) try: admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) except ConnectionTimeout as ct: log.error('Unable to add ins{0} to admin index.\n{1}'.format( hep_submission.inspire_id, ct)) send_finalised_email(hep_submission) if convert: for file_format in ['yaml', 'csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet( record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format( record.get('inspire_id')), version) return json.dumps({ "success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids }) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps({ "success": False, "recid": recid, "errors": [ "You do not have permission to finalise this " "submission. Only coordinators can do that." ] })
def do_finalise(recid, publication_record=None, force_finalise=False, commit_message=None, send_tweet=False, update=False): """ Creates record SIP for each data record with a link to the associated publication :param synchronous: if true then workflow execution and creation is waited on, then everything is indexed in one go. If False, object creation is asynchronous, however reindexing is not performed. This is only really useful for the full migration of content. """ hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() print('Finalising record {}'.format(recid)) generated_record_ids = [] if hep_submission \ and (force_finalise or hep_submission.coordinator == int(current_user.get_id())): submissions = DataSubmission.query.filter_by( publication_recid=recid, version=hep_submission.version).all() version = hep_submission.version existing_submissions = {} if hep_submission.version > 1 or update: # we need to determine which are the existing record ids. existing_data_records = get_records_matching_field( 'related_publication', recid, doc_type=CFG_DATA_TYPE) for record in existing_data_records["hits"]["hits"]: if "recid" in record["_source"]: existing_submissions[record["_source"]["title"]] = \ record["_source"]["recid"] delete_item_from_index(record["_id"], doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"]) current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now()) for submission in submissions: finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version) try: record = get_record_by_id(recid) # If we have a commit message, then we have a record update. # We will store the commit message and also update the # last_updated flag for the record. record['hepdata_doi'] = hep_submission.doi if commit_message: # On a revision, the last updated date will # be the current date. hep_submission.last_updated = datetime.now() commit_record = RecordVersionCommitMessage( recid=recid, version=version, message=str(commit_message)) db.session.add(commit_record) record['last_updated'] = datetime.strftime( hep_submission.last_updated, '%Y-%m-%d %H:%M:%S') record['version'] = version record.commit() hep_submission.inspire_id = record['inspire_id'] hep_submission.overall_status = "finished" db.session.add(hep_submission) db.session.commit() create_celery_app(current_app) # only mint DOIs if not testing. if not current_app.config.get('TESTING', False) and not current_app.config.get('NO_DOI_MINTING', False): for submission in submissions: generate_doi_for_data_submission.delay(submission.id, submission.version) generate_doi_for_submission.delay(recid, version) # Reindex everything. index_record_ids([recid] + generated_record_ids) push_data_keywords(pub_ids=[recid]) admin_indexer = AdminIndexer() admin_indexer.index_submission(hep_submission) send_finalised_email(hep_submission) for file_format in ['csv', 'yoda', 'root']: convert_and_store.delay(hep_submission.inspire_id, file_format, force=True) if send_tweet: tweet(record.get('title'), record.get('collaborations'), "http://www.hepdata.net/record/ins{0}".format(record.get('inspire_id'))) return json.dumps({"success": True, "recid": recid, "data_count": len(submissions), "generated_records": generated_record_ids}) except NoResultFound: print('No record found to update. Which is super strange.') else: return json.dumps( {"success": False, "recid": recid, "errors": ["You do not have permission to finalise this " "submission. Only coordinators can do that."]})