def load_file(inspire_id, send_tweet=False, convert=False, base_url='http://hepdata.cedar.ac.uk/view/{0}/yaml'): self = Migrator(base_url) output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id) if output_location: record_information = create_record( self.retrieve_publication_information(inspire_id)) try: recid = self.load_submission( record_information, output_location, os.path.join(output_location, "submission.yaml")) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, convert=convert) return True except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) return False else: log.error("Failed to load " + inspire_id) return False
def finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version): # we now create a 'payload' for each data submission # by creating a record json and uploading it via a bibupload task. # add in key for associated publication... keywords = [] for keyword in submission.keywords: keywords.append({ "name": keyword.name, "value": keyword.value, "synonyms": "" }) # we want to retrieve back the authors of the paper # and assign them as authors of the data too if not publication_record: publication_record = get_record_by_id(recid) submission_info = { "title": submission.name, "abstract": submission.description, "inspire_id": publication_record['inspire_id'], "doi": submission.doi, "authors": publication_record['authors'], "first_author": publication_record.get('first_author', None), "related_publication": submission.publication_recid, "creation_date": publication_record["creation_date"], "last_updated": current_time, "journal_info": publication_record.get("journal_info", ""), "keywords": keywords, "version": version, "collaborations": publication_record.get("collaborations", []), } if submission_info["title"] in existing_submissions: # in the event that we're performing an update operation, we need # to get the data record information # from the index, and use the same record id. This way, we'll just # update the submission instead of recreating # a completely new record. recid = existing_submissions[submission_info["title"]] submission_info["control_number"] = submission_info["recid"] = recid else: submission_info = create_record(submission_info) submission_info["control_number"] = submission_info["recid"] submission.associated_recid = submission_info['recid'] submission.publication_inspire_id = publication_record['inspire_id'] generated_record_ids.append(submission_info["recid"]) submission.version = version data_review = DataReview.query.filter_by(data_recid=submission.id).first() if data_review: data_review.version = version db.session.add(data_review) db.session.add(submission)
def test_record_creation(app): """___test_record_creation___""" with app.app_context(): record_information = create_record({'journal_info': 'Phys. Letts', 'title': 'My Journal Paper'}) assert (record_information['recid']) assert (record_information['uuid']) assert (record_information['title'] == 'My Journal Paper')
def test_record_creation(app): """___test_record_creation___""" with app.app_context(): record_information = create_record({'journal_info': 'Phys. Letts', 'title': 'My Journal Paper'}) assert (record_information['recid']) assert (record_information['uuid']) assert (record_information['title'] == 'My Journal Paper')
def finalise_datasubmission(current_time, existing_submissions, generated_record_ids, publication_record, recid, submission, version): # we now create a 'payload' for each data submission # by creating a record json and uploading it via a bibupload task. # add in key for associated publication... keywords = [] for keyword in submission.keywords: keywords.append({"name": keyword.name, "value": keyword.value, "synonyms": ""}) # we want to retrieve back the authors of the paper # and assign them as authors of the data too if not publication_record: publication_record = get_record_by_id(recid) submission_info = { "title": submission.name, "abstract": submission.description, "inspire_id": publication_record['inspire_id'], "doi": submission.doi, "authors": publication_record['authors'], "first_author": publication_record.get('first_author', None), "related_publication": submission.publication_recid, "creation_date": publication_record["creation_date"], "last_updated": current_time, "journal_info": publication_record.get("journal_info", ""), "keywords": keywords, "version": version, "collaborations": publication_record.get("collaborations", []), } if submission_info["title"] in existing_submissions: # in the event that we're performing an update operation, we need # to get the data record information # from the index, and use the same record id. This way, we'll just # update the submission instead of recreating # a completely new record. recid = existing_submissions[submission_info["title"]] submission_info["control_number"] = submission_info["recid"] = recid else: submission_info = create_record(submission_info) submission_info["control_number"] = submission_info["recid"] submission.associated_recid = submission_info['recid'] submission.publication_inspire_id = publication_record['inspire_id'] generated_record_ids.append(submission_info["recid"]) submission.version = version data_review = DataReview.query.filter_by(data_recid=submission.id).first() if data_review: data_review.version = version db.session.add(data_review) db.session.add(submission)
def process_submission_payload(*args, **kwargs): """ Processes the submission payload. :param inspire_id: :param title: :param reviewer: :param uploader: :param send_upload_email: :return: """ if kwargs.get('inspire_id'): content, status = get_inspire_record_information(kwargs.get('inspire_id')) content["inspire_id"] = kwargs.get('inspire_id') elif kwargs.get('title'): content = {'title': kwargs.get('title')} else: raise ValueError(message="A title or inspire_id must be provided.") record_information = create_record(content) submitter_id = kwargs.get('submitter_id') if submitter_id is None: submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()) hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id) if kwargs.get('inspire_id'): hepsubmission.inspire_id = kwargs.get('inspire_id') db.session.add(hepsubmission) reviewer_details = kwargs.get('reviewer') reviewer = create_participant_record( reviewer_details.get('name'), reviewer_details.get('email'), 'reviewer', 'primary', record_information['recid']) hepsubmission.participants.append(reviewer) uploader_details = kwargs.get('uploader') uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'), 'uploader', 'primary', record_information['recid']) hepsubmission.participants.append(uploader) db.session.commit() if kwargs.get('send_upload_email', True): # Now Send Email only to the uploader first. The reviewer will be asked to # review only when an upload has been performed. message = kwargs.get('message', None) send_cookie_email(uploader, record_information, message) admin_idx = AdminIndexer() admin_idx.index_submission(hepsubmission) return hepsubmission
def test_create_record_for_dashboard(app): with app.app_context(): record_information = create_record({ 'journal_info': 'Phys. Letts', 'title': 'My Journal Paper', 'inspire_id': '1487726' }) hepsubmission = get_or_create_hepsubmission( record_information['recid']) record = get_record_by_id(record_information['recid']) user = User(email='*****@*****.**', password='******', active=True, id=101) test_submissions = {} create_record_for_dashboard(record['recid'], test_submissions, user) assert (test_submissions == { record_information['recid']: { 'metadata': { 'coordinator': { 'name': 'No coordinator' }, 'recid': record_information['recid'], 'role': [], 'start_date': hepsubmission.created, 'last_updated': hepsubmission.last_updated, 'title': u'My Journal Paper', 'versions': 1 }, 'stats': { 'attention': 0, 'passed': 0, 'todo': 0 }, 'status': 'todo' } }) test_submissions = { record_information['recid']: { "metadata": { "role": [] } } } create_record_for_dashboard(record['recid'], test_submissions, user) assert (test_submissions == { record_information['recid']: { "metadata": { "role": [[]] } } })
def test_record_update(app): """___test_record_update___""" with app.app_context(): record_information = create_record({'journal_info': 'Phys. Letts', 'title': 'My Journal Paper'}) record = get_record_by_id(record_information['recid']) assert (record['title'] == 'My Journal Paper') assert (record['journal_info'] == 'Phys. Letts') update_record(record_information['recid'], {'journal_info': 'test'}) updated_record = get_record_by_id(record_information['recid']) assert (updated_record['journal_info'] == 'test')
def test_record_update(app): """___test_record_update___""" with app.app_context(): record_information = create_record({'journal_info': 'Phys. Letts', 'title': 'My Journal Paper'}) record = get_record_by_id(record_information['recid']) assert (record['title'] == 'My Journal Paper') assert (record['journal_info'] == 'Phys. Letts') update_record(record_information['recid'], {'journal_info': 'test'}) updated_record = get_record_by_id(record_information['recid']) assert (updated_record['journal_info'] == 'test')
def test_submissions_admin(app, load_submission): with app.app_context(): record_information = create_record({ 'journal_info': 'Phys. Letts', 'title': 'My Journal Paper', 'inspire_id': '1487726' }) hepsubmission = get_or_create_hepsubmission( record_information['recid']) role = Role(name='admin') user = User(email='*****@*****.**', password='******', active=True, id=101, roles=[role]) assert (get_submission_count(user) == 1) assert (list_submission_titles(user) == [{ 'id': record_information['recid'], 'title': 'My Journal Paper' }]) submissions = prepare_submissions(user) assert (len(submissions) == 1) assert (submissions[str(record_information['recid'])] == { 'metadata': { 'coordinator': { 'email': u'*****@*****.**', 'name': u'*****@*****.**', 'id': 1 }, 'recid': str(record_information['recid']), 'role': [], 'show_coord_view': False, 'start_date': hepsubmission.created, 'last_updated': hepsubmission.last_updated, 'title': u'My Journal Paper', 'versions': 1 }, 'stats': { 'attention': 0, 'passed': 0, 'todo': 0 }, 'status': u'todo' })
def process_submission_payload(*args, **kwargs): """ Processes the submission payload :param inspire_id: :param title: :param reviewer: :param uploader: :param send_upload_email: :return: """ if kwargs.get('inspire_id'): content, status = get_inspire_record_information(kwargs.get('inspire_id')) content["inspire_id"] = kwargs.get('inspire_id') elif kwargs.get('title'): content = {'title': kwargs.get('title')} else: raise ValueError(message="A title or inspire_id must be provided.") record_information = create_record(content) submitter_id = kwargs.get('submitter_id') if submitter_id is None: submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()) hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id) reviewer_details = kwargs.get('reviewer') reviewer = create_participant_record( reviewer_details.get('name'), reviewer_details.get('email'), 'reviewer', 'primary', record_information['recid']) hepsubmission.participants.append(reviewer) uploader_details = kwargs.get('uploader') uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'), 'uploader', 'primary', record_information['recid']) hepsubmission.participants.append(uploader) db.session.commit() if kwargs.get('send_upload_email', True): # Now Send Email only to the uploader first. The reviewer will be asked to # review only when an upload has been performed. message = kwargs.get('message', None) send_cookie_email(uploader, record_information, message) return hepsubmission
def retrieve_publication_information(self, inspire_id): """ :param inspire_id: id for record to get. If this contains 'ins', the 'ins' is removed. :return: dict containing keys for: title doi authors abstract arxiv_id collaboration """ if "ins" in inspire_id: inspire_id = int(inspire_id.replace("ins", "")) content, status = get_inspire_record_information(inspire_id) content["inspire_id"] = inspire_id return create_record(content)
def test_submissions_participant(app, load_submission): with app.app_context(): record_information = create_record({ 'journal_info': 'Phys. Letts', 'title': 'My Journal Paper', 'inspire_id': '1487726' }) hepsubmission = get_or_create_hepsubmission( record_information['recid']) db.session.add(hepsubmission) user = User(email='*****@*****.**', password='******', active=True) db.session.add(user) db.session.commit() # Check the user doesn't see the record before they are a participant assert (get_submission_count(user) == 0) assert (list_submission_titles(user) == []) # Add the user as a participant participant = SubmissionParticipant( publication_recid=record_information['recid'], role="uploader", email='*****@*****.**', status='primary', user_account=user.id) db.session.add(participant) hepsubmission.participants.append(participant) db.session.add(hepsubmission) db.session.commit() assert (get_submission_count(user) == 1) assert (list_submission_titles(user) == [{ 'id': record_information['recid'], 'title': 'My Journal Paper' }]) participant_submissions = prepare_submissions(user) assert (len(participant_submissions) == 1) assert (participant_submissions[str(record_information['recid'])] == { 'metadata': { 'coordinator': { 'email': u'*****@*****.**', 'name': u'*****@*****.**', 'id': 1 }, 'recid': str(record_information['recid']), 'role': ['uploader'], 'show_coord_view': False, 'start_date': hepsubmission.created, 'last_updated': hepsubmission.last_updated, 'title': u'My Journal Paper', 'versions': 1 }, 'stats': { 'attention': 0, 'passed': 0, 'todo': 0 }, 'status': u'todo' }) # Add a new submission as coordinator record_information2 = create_record({ 'journal_info': 'Another Journal', 'title': 'My New Journal Paper', 'inspire_id': '123456' }) hepsubmission = get_or_create_hepsubmission( record_information2['recid'], coordinator=user.id) assert (get_submission_count(user) == 2) assert (list_submission_titles(user) == [{ 'id': record_information2['recid'], 'title': 'My New Journal Paper' }, { 'id': record_information['recid'], 'title': 'My Journal Paper' }]) all_submissions = prepare_submissions(user) assert (len(all_submissions) == 2) assert (all_submissions[str(record_information2['recid'])] == { 'metadata': { 'coordinator': { 'email': u'*****@*****.**', 'name': u'*****@*****.**', 'id': user.id }, 'recid': str(record_information2['recid']), 'role': ['coordinator'], 'show_coord_view': True, 'start_date': hepsubmission.created, 'last_updated': hepsubmission.last_updated, 'title': u'My New Journal Paper', 'versions': 1 }, 'stats': { 'attention': 0, 'passed': 0, 'todo': 0 }, 'status': u'todo' }) # Check pagination page1_submissions = prepare_submissions(user, 1) assert (len(page1_submissions) == 1) assert (page1_submissions[str( record_information2['recid'])] == all_submissions[str( record_information2['recid'])]) page2_submissions = prepare_submissions(user, 1, 2) assert (len(page2_submissions) == 1) assert (page2_submissions[str( record_information['recid'])] == all_submissions[str( record_information['recid'])]) # Check filtering by record id record_submissions = prepare_submissions( user, record_id=record_information2['recid']) assert (len(record_submissions) == 1) assert (record_submissions[str( record_information2['recid'])] == all_submissions[str( record_information2['recid'])]) # change status to 'finished' and check new submission no longer appears hepsubmission.overall_status = 'finished' db.session.add(hepsubmission) db.session.commit() assert (get_submission_count(user) == 1) all_submissions = prepare_submissions(user) assert (len(all_submissions) == 1) assert (list( all_submissions.keys()) == [str(record_information['recid'])])
def mock_import_old_record(inspire_id=mock_inspire_ids[1], send_email=False): """Creates a submission but mimics the old migrated paths. (See hepdata master branch at ccd691b for old migrator module.) """ if inspire_id not in mock_inspire_ids: raise ValueError('Invalid inspire id %s. Accepted values are: %s' % (inspire_id, ', '.join(mock_inspire_ids))) # Use zipped test data for specific record(s) publication_information, status = get_inspire_record_information( inspire_id) publication_information["inspire_id"] = inspire_id # Create record if status == "success": record_information = create_record(publication_information) else: log.error("Failed to retrieve publication information for " + inspire_id) return False # Unzip into correct data dir data_path = get_data_path_for_record(record_information['recid']) base_dir = os.path.dirname(os.path.realpath(__file__)) zip_path = os.path.join(base_dir, 'old_hepdata_zips', 'ins%s.zip' % inspire_id) if os.path.isfile(zip_path): log.info('Unzipping %s to %s' % (zip_path, data_path)) shutil.unpack_archive(zip_path, data_path) time_stamp = str(int(round(time.time()))) yaml_path = os.path.join(data_path, time_stamp) sub_zip_path = os.path.join(data_path, 'ins%s.zip' % inspire_id) shutil.unpack_archive(sub_zip_path, yaml_path) else: log.error('Invalid path %s' % zip_path) return False # Create submission admin_user_id = 1 # Consume data payload and store in db. get_or_create_hepsubmission(record_information["recid"], admin_user_id) errors = process_submission_directory(yaml_path, os.path.join(yaml_path, "submission.yaml"), record_information["recid"], old_submission_schema=True, old_data_schema=True) if errors: log.error( "Submission failed for {0}.".format(record_information["recid"]), errors, record_information["recid"]) return False do_finalise(record_information['recid'], publication_record=record_information, force_finalise=True, convert=False, send_email=send_email)
def _import_record(inspire_id, update_existing=False, base_url='https://hepdata.net', send_email=False): publication_information, status = get_inspire_record_information( inspire_id) if status != "success": log.error("Failed to retrieve publication information for " + inspire_id) return False current_submission = get_latest_hepsubmission(inspire_id=inspire_id) if not current_submission: log.info( "The record with id {0} does not exist in the database, so we're loading it." .format(inspire_id)) publication_information["inspire_id"] = inspire_id record_information = create_record(publication_information) recid = record_information['recid'] else: log.info("The record with inspire id {0} already exists.".format( inspire_id)) if update_existing: log.info("Updating instead") recid = current_submission.publication_recid else: log.info("Not updating as update_existing is False") return False try: download_path = _download_file(base_url, inspire_id) filename = os.path.basename(download_path) time_stamp = str(int(round(time.time()))) file_save_directory = get_data_path_for_record(str(recid), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) file_path = os.path.join(file_save_directory, filename) log.info("Moving file to %s" % file_path) shutil.copy(download_path, file_path) # Create submission admin_user_id = 1 hepsubmission = get_or_create_hepsubmission(recid, admin_user_id) db.session.add(hepsubmission) db.session.commit() # Then process the payload as for any other record errors = process_zip_archive(file_path, recid) if errors: log.info("Errors processing archive. Re-trying with old schema.") # Try again with old schema # Need to clean up first to avoid errors # First delete tables cleanup_submission(recid, 1, []) # Next remove remaining files file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) shutil.rmtree(submission_path) errors = process_zip_archive(file_path, recid, old_submission_schema=True, old_data_schema=True) if errors: log.error("Could not process zip archive: ") for file, file_errors in errors.items(): log.error(" %s:" % file) for error in file_errors: log.error(" %s" % error['message']) raise ValueError("Could not validate record.") # Delete any previous upload folders cleanup_old_files(hepsubmission) log.info("Finalising record %s" % recid) result_json = do_finalise(recid, force_finalise=True, update=(current_submission is not None), convert=False, send_email=send_email) result = json.loads(result_json) if result and result['success']: log.info("Imported record %s with %s submissions" % (recid, result['data_count'])) return True else: raise ValueError("Failed to finalise record.") except Exception as e: # Unload record unload_submission(recid) log.error(e) return False
def test_dashboard(live_server, logged_in_browser): """ Test dashboard functions """ browser = logged_in_browser # Create some submissions so that there'll be something on the dashboard # and on 2 pages. Current user will be coordinator and uploader. for i in range(26): content = {'title': f'Dashboard Test {i}'} record_information = create_record(content) hepsubmission = get_or_create_hepsubmission(record_information["recid"], 1) participant_record = SubmissionParticipant(email='*****@*****.**', status='primary', role='uploader', user_account=1, publication_recid=record_information["recid"]) db.session.add(hepsubmission) db.session.add(participant_record) db.session.commit() # Confirm there are 26 'todo' submissions submissions = HEPSubmission.query \ .filter_by(overall_status='todo').all() assert len(submissions) == 26 # Click on dashboard link browser.find_element_by_link_text('Dashboard').click() e2e_assert_url(browser, 'hep_dashboard.dashboard') # Check links in top section work # Submissions Overview link browser.find_element_by_link_text('Submissions Overview').click() e2e_assert_url(browser, 'hep_dashboard.submissions') # Wait for graph to load WebDriverWait(browser, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#submission_vis svg")) ) # Go back browser.back() e2e_assert_url(browser, 'hep_dashboard.dashboard') # Edit Profile link browser.find_element_by_link_text('Edit Profile').click() e2e_assert_url(browser, 'invenio_userprofiles.profile') # Go back browser.back() e2e_assert_url(browser, 'hep_dashboard.dashboard') # Wait for submissions to load submissions_list = WebDriverWait(browser, 10).until( EC.presence_of_element_located((By.ID, "hep-submissions")) ) submission_items = submissions_list.find_elements_by_class_name('submission-item') assert len(submission_items) == 25 # Check pagination works browser.find_element_by_css_selector(".pagination-bar a[href='/dashboard/?page=2']").click() # Wait for loader, then new items appear WebDriverWait(browser, 10).until( EC.text_to_be_present_in_element( (By.CSS_SELECTOR, '.submission-item h4 a'), 'Dashboard Test 0' ) ) # Should just be 1 submission on page 2 submission_items = browser.find_elements_by_class_name('submission-item') assert len(submission_items) == 1 # Check settings modal appears submission_items[0].find_element_by_class_name('manage-submission-trigger').click() manage_widget = WebDriverWait(browser, 10).until( EC.visibility_of_element_located((By.ID, 'manageWidget')) ) assert manage_widget.find_element_by_class_name('modal-title').text == 'Manage Submission' # Close modal manage_widget.find_element_by_css_selector('.modal-footer .btn-default').click() WebDriverWait(browser, 10).until( EC.invisibility_of_element(manage_widget) ) # Click delete button # Check settings modal appears submission_items[0].find_element_by_class_name('delete-submission-trigger').click() delete_widget = WebDriverWait(browser, 10).until( EC.visibility_of_element_located((By.ID, 'deleteWidget')) ) assert delete_widget.find_element_by_class_name('modal-title').text == 'Delete Submission' # Confirm deletion delete_widget.find_element_by_class_name('confirm-delete').click() # Wait for confirmation of deletion WebDriverWait(browser, 10).until( EC.presence_of_element_located((By.ID, 'delete-success')) ) assert 'Submission deleted' in \ delete_widget.find_element_by_css_selector('#delete-success p').text # Should now be 25 submissions not 26 submissions = HEPSubmission.query \ .filter_by(overall_status='todo').all() assert len(submissions) == 25 # Reload the dashboard (rather than waiting) browser.refresh() # Check permissions widget # Coordinator tab should have 5 items (restricted as we are user id 1) coordinator_pane = WebDriverWait(browser, 10).until( EC.presence_of_element_located((By.ID, 'coordinator')) ) coordinator_rows = coordinator_pane.find_elements_by_class_name('row') assert len(coordinator_rows) == 5 # Click on uploader pane - should be all 25 items browser.find_element_by_link_text('uploader').click() uploader_pane = browser.find_element_by_id('uploader') uploader_rows = uploader_pane.find_elements_by_class_name('row') assert len(uploader_rows) == 25 # Only first 5 should be visible assert all(row.is_displayed() for row in uploader_rows[:5]) assert all(not row.is_displayed() for row in uploader_rows[5:]) # Scroll down to find paginator ActionChains(browser).move_to_element(uploader_rows[4]).perform() # Click on last page uploader_pane.find_element_by_css_selector(".pagination-bar li a[title=last]").click() # Now last 5 items should be visible assert all(not row.is_displayed() for row in uploader_rows[:20]) assert all(row.is_displayed() for row in uploader_rows[20:])