def attach_information_to_record(recid): """ Given an INSPIRE data representation, this will process the data, and update information for a given record id with the contents. :return: """ inspire_id = request.form['inspire_id'] content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id record = get_record_by_id(recid) if record is not None and status == 'success': content['recid'] = recid record.update(content) record.commit() hep_submission = HEPSubmission.query.filter_by( publication_recid=recid, overall_status="todo").first() hep_submission.inspire_id = inspire_id db.session.add(hep_submission) db.session.commit() return jsonify({'status': 'success'}) elif status != 'success': return jsonify({'status': status, 'message': 'Request for INSPIRE record {} failed.'.format(inspire_id)}) else: return jsonify({'status': 'failed', 'message': 'No record with recid {} was found.'.format(str(recid))})
def attach_information_to_record(recid): """ Given an INSPIRE data representation, this will process the data, and update information for a given record id with the contents. :return: """ inspire_id = request.form['inspire_id'] content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id record = get_record_by_id(recid) if record is not None: content['recid'] = recid patch = jsonpatch.JsonPatch.from_diff(record, content) record = record.patch(patch=patch) record.commit() db.session.commit() return jsonify({'status': 'success'}) else: return jsonify({'status': 'failed', 'message': 'No record with that recid was found.'})
def test_parser(inspire_id, title, creation_date, year, subject_area): content, status = get_inspire_record_information(inspire_id) assert decode_string(content["title"]) == decode_string(title) assert content["creation_date"] == creation_date assert int(content["year"]) == year if subject_area is not None: assert content["subject_area"] == subject_area
def test_update_record_info(app): """Test update of publication information from INSPIRE.""" assert update_record_info( None) == 'Inspire ID is None' # case where Inspire ID is None for inspire_id in ( '1311487', '19999999'): # check both a valid and invalid Inspire ID assert update_record_info( inspire_id ) == 'No HEPData submission' # before creation of HEPSubmission object submission = process_submission_payload(inspire_id=inspire_id, submitter_id=1, reviewer={ 'name': 'Reviewer', 'email': '*****@*****.**' }, uploader={ 'name': 'Uploader', 'email': '*****@*****.**' }, send_upload_email=False) # Process the files to create DataSubmission tables in the DB. base_dir = os.path.dirname(os.path.realpath(__file__)) directory = os.path.join(base_dir, 'test_data/test_submission') tmp_path = os.path.join(tempfile.mkdtemp(dir=CFG_TMPDIR), 'test_submission') shutil.copytree(directory, tmp_path) process_submission_directory(tmp_path, os.path.join(tmp_path, 'submission.yaml'), submission.publication_recid) do_finalise(submission.publication_recid, force_finalise=True, convert=False) if inspire_id == '19999999': assert update_record_info(inspire_id) == 'Invalid Inspire ID' else: # First change the publication information to that of a different record. different_inspire_record_information, status = get_inspire_record_information( '1650066') assert status == 'success' hep_submission = get_latest_hepsubmission(inspire_id=inspire_id) assert hep_submission is not None update_record(hep_submission.publication_recid, different_inspire_record_information) # Then can check that the update works and that a further update is not required. assert update_record_info(inspire_id, send_email=True) == 'Success' assert update_record_info( inspire_id ) == 'No update needed' # check case where information already current unload_submission(submission.publication_recid)
def process_submission_payload(*args, **kwargs): """ Processes the submission payload. :param inspire_id: :param title: :param reviewer: :param uploader: :param send_upload_email: :return: """ if kwargs.get('inspire_id'): content, status = get_inspire_record_information(kwargs.get('inspire_id')) content["inspire_id"] = kwargs.get('inspire_id') elif kwargs.get('title'): content = {'title': kwargs.get('title')} else: raise ValueError(message="A title or inspire_id must be provided.") record_information = create_record(content) submitter_id = kwargs.get('submitter_id') if submitter_id is None: submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()) hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id) if kwargs.get('inspire_id'): hepsubmission.inspire_id = kwargs.get('inspire_id') db.session.add(hepsubmission) reviewer_details = kwargs.get('reviewer') reviewer = create_participant_record( reviewer_details.get('name'), reviewer_details.get('email'), 'reviewer', 'primary', record_information['recid']) hepsubmission.participants.append(reviewer) uploader_details = kwargs.get('uploader') uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'), 'uploader', 'primary', record_information['recid']) hepsubmission.participants.append(uploader) db.session.commit() if kwargs.get('send_upload_email', True): # Now Send Email only to the uploader first. The reviewer will be asked to # review only when an upload has been performed. message = kwargs.get('message', None) send_cookie_email(uploader, record_information, message) admin_idx = AdminIndexer() admin_idx.index_submission(hepsubmission) return hepsubmission
def finalise(recid, publication_record=None, force_finalise=False): commit_message = request.form.get('message') # Update publication information from INSPIRE record before finalising. if not publication_record: record = get_record_by_id(recid) content, status = get_inspire_record_information(record['inspire_id']) if status == 'success': publication_record = update_record(recid, content) return do_finalise(recid, publication_record=publication_record, force_finalise=force_finalise, commit_message=commit_message, send_tweet=True)
def test_parser(): test_data = [ { "inspire_id": "1245023", "title": "High-statistics study of $K^0_S$ pair " "production in two-photon collisions", "creation_date": "2013-07-29", "year": 2013, }, { "inspire_id": "1183818", "title": "Measurements of the pseudorapidity dependence " "of the total transverse energy " "in proton-proton " "collisions at $\sqrt{s}=7$ TeV with ATLAS", "creation_date": "2012-08-01", "year": 2012, }, { "inspire_id": "1407276", "title": "Elastic scattering of negative pions by protons at 2 BeV/c", "creation_date": "1963-01-01", "year": 1963, }, { "inspire_id": "44234", "title": "DIFFERENTIAL ELASTIC PION-PROTON SCATTERING AT 600-MEV, 650-MEV and 750-MEV", "creation_date": "2006-04-11", "year": 2006, }, { "inspire_id": "1187688", "title": "Mesure de la polarisation du proton de recul dans la diffusion élastique " "pi+- p entre 550 et 1025 MeV", "creation_date": "1970-01-01", "year": 1970, }, { "inspire_id": "67677", "title": "INELASTIC ELECTRON - DEUTERON SCATTERING AT HIGH-ENERGIES", "creation_date": "1971-01-01", "year": 1971, }, ] for test in test_data: content, status = get_inspire_record_information(test["inspire_id"]) assert decode_string(content["title"]) == decode_string(test["title"]) assert content["creation_date"] == test["creation_date"] assert int(content["year"]) == test["year"]
def process_submission_payload(*args, **kwargs): """ Processes the submission payload :param inspire_id: :param title: :param reviewer: :param uploader: :param send_upload_email: :return: """ if kwargs.get('inspire_id'): content, status = get_inspire_record_information(kwargs.get('inspire_id')) content["inspire_id"] = kwargs.get('inspire_id') elif kwargs.get('title'): content = {'title': kwargs.get('title')} else: raise ValueError(message="A title or inspire_id must be provided.") record_information = create_record(content) submitter_id = kwargs.get('submitter_id') if submitter_id is None: submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()) hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id) reviewer_details = kwargs.get('reviewer') reviewer = create_participant_record( reviewer_details.get('name'), reviewer_details.get('email'), 'reviewer', 'primary', record_information['recid']) hepsubmission.participants.append(reviewer) uploader_details = kwargs.get('uploader') uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'), 'uploader', 'primary', record_information['recid']) hepsubmission.participants.append(uploader) db.session.commit() if kwargs.get('send_upload_email', True): # Now Send Email only to the uploader first. The reviewer will be asked to # review only when an upload has been performed. message = kwargs.get('message', None) send_cookie_email(uploader, record_information, message) return hepsubmission
def retrieve_publication_information(self, inspire_id): """ :param inspire_id: id for record to get. If this contains "ins", the "ins" is removed. :return: dict containing keys for: title doi authors abstract arxiv_id collaboration """ if "ins" in inspire_id: inspire_id = int(inspire_id.replace("ins", "")) content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id return content, status
def test_parser(): test_data = [{"inspire_id": "1245023", "title": "High-statistics study of $K^0_S$ pair " "production in two-photon collisions", "creation_date": "2013-07-29", "year": 2013, "subject_area": ['HEP Experiment']}, {"inspire_id": "1183818", "title": "Measurements of the pseudorapidity dependence " "of the total transverse energy " "in proton-proton " "collisions at $\sqrt{s}=7$ TeV with ATLAS", "creation_date": "2012-08-01", "year": 2012, "subject_area": ["HEP Experiment"]}, {"inspire_id": "1407276", "title": "Elastic scattering of negative pions by protons at 2 BeV/c", "creation_date": "1963-01-01", "year": 1963}, {"inspire_id": "44234", "title": "DIFFERENTIAL ELASTIC PION-PROTON SCATTERING AT 600-MEV, 650-MEV and 750-MEV", "creation_date": "2006-04-11", "year": 2006}, {"inspire_id": "1187688", "title": "Mesure de la polarisation du proton de recul dans la diffusion élastique " "pi+- p entre 550 et 1025 MeV", "creation_date": "1970-01-01", "year": 1970}, {"inspire_id": "67677", "title": "INELASTIC ELECTRON - DEUTERON SCATTERING AT HIGH-ENERGIES", "creation_date": "1971-01-01", "year": 1971 } ] for test in test_data: content, status = get_inspire_record_information( test["inspire_id"]) assert (decode_string(content["title"]) == decode_string(test["title"])) assert (content["creation_date"] == test["creation_date"]) assert (int(content["year"]) == test["year"]) if 'subject_area' in test: assert (content["subject_area"] == test["subject_area"])
def retrieve_publication_information(self, inspire_id): """ :param inspire_id: id for record to get. If this contains 'ins', the 'ins' is removed. :return: dict containing keys for: title doi authors abstract arxiv_id collaboration """ if "ins" in inspire_id: inspire_id = int(inspire_id.replace("ins", "")) content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id return create_record(content)
def attach_information_to_record(recid): """ Given an INSPIRE data representation, this will process the data, and update information for a given record id with the contents. :return: """ inspire_id = request.form["inspire_id"] content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id record = get_record_by_id(recid) if record is not None: content["recid"] = recid patch = jsonpatch.JsonPatch.from_diff(record, content) record = record.patch(patch=patch) record.commit() db.session.commit() return jsonify({"status": "success"}) else: return jsonify({"status": "failed", "message": "No record with that recid was found."})
def mock_import_old_record(inspire_id=mock_inspire_ids[1], send_email=False): """Creates a submission but mimics the old migrated paths. (See hepdata master branch at ccd691b for old migrator module.) """ if inspire_id not in mock_inspire_ids: raise ValueError('Invalid inspire id %s. Accepted values are: %s' % (inspire_id, ', '.join(mock_inspire_ids))) # Use zipped test data for specific record(s) publication_information, status = get_inspire_record_information( inspire_id) publication_information["inspire_id"] = inspire_id # Create record if status == "success": record_information = create_record(publication_information) else: log.error("Failed to retrieve publication information for " + inspire_id) return False # Unzip into correct data dir data_path = get_data_path_for_record(record_information['recid']) base_dir = os.path.dirname(os.path.realpath(__file__)) zip_path = os.path.join(base_dir, 'old_hepdata_zips', 'ins%s.zip' % inspire_id) if os.path.isfile(zip_path): log.info('Unzipping %s to %s' % (zip_path, data_path)) shutil.unpack_archive(zip_path, data_path) time_stamp = str(int(round(time.time()))) yaml_path = os.path.join(data_path, time_stamp) sub_zip_path = os.path.join(data_path, 'ins%s.zip' % inspire_id) shutil.unpack_archive(sub_zip_path, yaml_path) else: log.error('Invalid path %s' % zip_path) return False # Create submission admin_user_id = 1 # Consume data payload and store in db. get_or_create_hepsubmission(record_information["recid"], admin_user_id) errors = process_submission_directory(yaml_path, os.path.join(yaml_path, "submission.yaml"), record_information["recid"], old_submission_schema=True, old_data_schema=True) if errors: log.error( "Submission failed for {0}.".format(record_information["recid"]), errors, record_information["recid"]) return False do_finalise(record_information['recid'], publication_record=record_information, force_finalise=True, convert=False, send_email=send_email)
def update_record_info(inspire_id, send_email=False): """Update publication information from INSPIRE for a specific record.""" if inspire_id is None: log.error("Inspire ID is None") return 'Inspire ID is None' inspire_id = inspire_id.replace("ins", "") hep_submission = get_latest_hepsubmission(inspire_id=inspire_id) if hep_submission is None: log.warning("Failed to retrieve HEPData submission for Inspire ID {0}".format(inspire_id)) return 'No HEPData submission' publication_recid = hep_submission.publication_recid log.info("Updating recid {} with information from Inspire record {}".format(publication_recid, inspire_id)) updated_inspire_record_information, status = get_inspire_record_information(inspire_id) if status == 'success': # Also need to update publication information for data records. data_submissions = DataSubmission.query.filter_by( publication_recid=publication_recid, version=hep_submission.version ).order_by(DataSubmission.id.asc()) record_ids = [publication_recid] # list of record IDs for data_submission in data_submissions: record_ids.append(data_submission.associated_recid) same_information = {} for index, recid in enumerate(record_ids): if index == 0: updated_record_information = updated_inspire_record_information else: # Only update selected keys for data records. updated_record_information = { key: updated_inspire_record_information[key] for key in ( 'authors', 'creation_date', 'journal_info', 'collaborations' ) } record_information = get_record_by_id(recid) same_information[recid] = True for key, value in updated_record_information.items(): if key not in record_information or record_information[key] != value: log.debug('For recid {}, key {} has new value {}'.format(recid, key, value)) same_information[recid] = False update_record(recid, updated_record_information) break log.info('For recid {}, information needs to be updated: {}'.format(recid, str(not(same_information[recid])))) if all(same for same in same_information.values()): return 'No update needed' else: log.warning("Failed to retrieve publication information for Inspire record {0}".format(inspire_id)) return 'Invalid Inspire ID' if hep_submission.overall_status == 'finished': index_record_ids(record_ids) # index for Elasticsearch push_data_keywords(pub_ids=[recid]) if not TESTING: generate_dois_for_submission.delay(inspire_id=inspire_id) # update metadata stored in DataCite if send_email: record_information = get_record_by_id(publication_recid) notify_publication_update(hep_submission, record_information) # send email to all participants return 'Success'
def _import_record(inspire_id, update_existing=False, base_url='https://hepdata.net', send_email=False): publication_information, status = get_inspire_record_information( inspire_id) if status != "success": log.error("Failed to retrieve publication information for " + inspire_id) return False current_submission = get_latest_hepsubmission(inspire_id=inspire_id) if not current_submission: log.info( "The record with id {0} does not exist in the database, so we're loading it." .format(inspire_id)) publication_information["inspire_id"] = inspire_id record_information = create_record(publication_information) recid = record_information['recid'] else: log.info("The record with inspire id {0} already exists.".format( inspire_id)) if update_existing: log.info("Updating instead") recid = current_submission.publication_recid else: log.info("Not updating as update_existing is False") return False try: download_path = _download_file(base_url, inspire_id) filename = os.path.basename(download_path) time_stamp = str(int(round(time.time()))) file_save_directory = get_data_path_for_record(str(recid), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) file_path = os.path.join(file_save_directory, filename) log.info("Moving file to %s" % file_path) shutil.copy(download_path, file_path) # Create submission admin_user_id = 1 hepsubmission = get_or_create_hepsubmission(recid, admin_user_id) db.session.add(hepsubmission) db.session.commit() # Then process the payload as for any other record errors = process_zip_archive(file_path, recid) if errors: log.info("Errors processing archive. Re-trying with old schema.") # Try again with old schema # Need to clean up first to avoid errors # First delete tables cleanup_submission(recid, 1, []) # Next remove remaining files file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) shutil.rmtree(submission_path) errors = process_zip_archive(file_path, recid, old_submission_schema=True, old_data_schema=True) if errors: log.error("Could not process zip archive: ") for file, file_errors in errors.items(): log.error(" %s:" % file) for error in file_errors: log.error(" %s" % error['message']) raise ValueError("Could not validate record.") # Delete any previous upload folders cleanup_old_files(hepsubmission) log.info("Finalising record %s" % recid) result_json = do_finalise(recid, force_finalise=True, update=(current_submission is not None), convert=False, send_email=send_email) result = json.loads(result_json) if result and result['success']: log.info("Imported record %s with %s submissions" % (recid, result['data_count'])) return True else: raise ValueError("Failed to finalise record.") except Exception as e: # Unload record unload_submission(recid) log.error(e) return False