def test_search_export(): expected_result = read_response_as_json('tools', 'search_expected.json') search_index = 'scoap3-records-record' search_result = read_response_as_json('elasticsearch', 'tool_search.json') es = MockES(search_result, with_assert=False) config = { 'SEARCH_UI_SEARCH_INDEX': search_index, 'SEARCH_EXPORT_FIELDS': ( ('Publication year', 'year', 'year'), ('Control number', 'control_number', 'control_number'), ('DOI', 'dois', 'dois[0].value'), ('Title', 'titles', 'titles[0].title'), ('arXiv id', 'arxiv_eprints', 'arxiv_eprints[0].value'), ('arXiv primary category', 'arxiv_eprints', 'arxiv_eprints[0].categories[0]'), ('Publication date', 'imprints', 'imprints[0].date'), ('Record creation date', 'record_creation_date', 'record_creation_date'), ('Journal', 'publication_info', 'publication_info[0].journal_title'), ) } with patch('scoap3.modules.tools.tools.current_search_client', es), \ patch('scoap3.modules.tools.tools.current_app', MockApp(config)): result = search_export(None) assert result['data'] == expected_result
def base_test_tool(export_function, country): search_index = 'scoap3-records-record' search_result = read_response_as_json('elasticsearch', 'tool_affiliations.json') q = 'country:%s' % country if country else None kwargs = { 'q': q, 'index': search_index, '_source': [ 'publication_info.year', 'publication_info.journal_title', 'arxiv_eprints', 'dois', 'authors', 'control_number', ], 'size': 100, 'from_': 0 } es = MockES(search_result, **kwargs) config = {'SEARCH_UI_SEARCH_INDEX': search_index} with patch('scoap3.modules.tools.tools.current_search_client', es), \ patch('scoap3.modules.tools.tools.current_app', MockApp(config)): return export_function(country=country)
def test_halt_invalid_record(): # read article data from json json_data = read_response_as_json('hepcrawl', 'aps2.json') # delete a required field to fail the validation json_data.pop('abstracts') # run workflow with requests_mock.Mocker() as m: m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188', content=read_response( 'article_upload', 'export.arxiv.org_api_query_search_query_id_1808.08188')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'application/pdf'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.pdf')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'text/xml'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.xml')) m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025', content=read_response( 'article_upload', 'crossref.org_works_10.1103_PhysRevD.99.075025')) workflow = run_article_upload_with_data(json_data, m) assert workflow.status == WorkflowStatus.HALTED assert workflow.objects[0].extra_data['_message'].startswith( "Validation error: u'abstracts' is a " "required property")
def test_invalid_record_update_with_whole_workflow(): """ Runs the article_upload workflow on the same article twice. For the second run a slightly modified json is passed, which has to fail the validation. """ with requests_mock.Mocker() as m: m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188', content=read_response( 'article_upload', 'export.arxiv.org_api_query_search_query_id_1808.08188')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'application/pdf'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.pdf')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'text/xml'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.xml')) m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025', content=read_response( 'article_upload', 'crossref.org_works_10.1103_PhysRevD.99.075025')) # read article data from json json_data = read_response_as_json('hepcrawl', 'aps2.json') # run first workflow workflow1 = run_article_upload_with_data(json_data, m) record1 = get_record_from_workflow(workflow1) assert workflow1.status == WorkflowStatus.COMPLETED # update article data updated_json_data = json_data.copy() updated_json_data['titles'][0]['title'] = 'Manually updated title' # delete a required field to fail the validation updated_json_data.pop('abstracts') # run second workflow workflow2 = run_article_upload_with_data(updated_json_data, m) record2 = get_record_from_workflow(workflow2) assert workflow2.status == WorkflowStatus.HALTED assert workflow2.objects[0].extra_data['_message'].startswith( "Validation error: u'abstracts' is a " "required property") # control number and title should be the same, since the validation failed assert record1["control_number"] == record2["control_number"] assert record1['titles'][0]['title'] == 'Alternative perspective on gauged lepton number and implications ' \ 'for collider physics' assert record1['titles'][0]['title'] == record2['titles'][0]['title']
def test_record_update(): """ Runs the article_upload workflow on the same article twice. For the second run a slightly modified json is passed, to test if the record will be updated and a new record won't be created. """ with requests_mock.Mocker() as m: m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188', content=read_response( 'article_upload', 'export.arxiv.org_api_query_search_query_id_1808.08188')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'application/pdf'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.pdf')) m.register_uri( 'GET', 'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025', request_headers={'Accept': 'text/xml'}, content=read_response('article_upload', 'harvest.aps.org_PhysRevD.99.075025.xml')) m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025', content=read_response( 'article_upload', 'crossref.org_works_10.1103_PhysRevD.99.075025')) # read article data from json json_data = read_response_as_json('hepcrawl', 'aps2.json') # run first workflow workflow1 = run_article_upload_with_data(json_data, m) record1 = get_record_from_workflow(workflow1) assert workflow1.status == WorkflowStatus.COMPLETED # update article data updated_json_data = json_data.copy() updated_json_data['titles'][0]['title'] = 'Manually updated title' # run second workflow workflow2 = run_article_upload_with_data(updated_json_data, m) record2 = get_record_from_workflow(workflow2) assert workflow2.status == WorkflowStatus.COMPLETED # control number should be the same, but article data has to be updated assert record1["control_number"] == record2["control_number"] assert record1['titles'][0]['title'] == 'Alternative perspective on gauged lepton number and implications ' \ 'for collider physics' assert record2['titles'][0]['title'] == 'Manually updated title'
def test_halt_record_without_authors(): # read article data from json json_data = read_response_as_json('hepcrawl', 'aps2.json') # delete authors json_data.pop('authors') # run workflow with requests_mock.Mocker() as m: m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188', content=read_response( 'article_upload', 'export.arxiv.org_api_query_search_query_id_1808.08188')) workflow = run_article_upload_with_data(json_data, m) assert workflow.status == WorkflowStatus.HALTED assert workflow.objects[0].extra_data[ '_message'] == 'No authors for article.'
def test_halt_record_without_affiliations(): # read article data from json json_data = read_response_as_json('hepcrawl', 'aps2.json') author_count = len(json_data.get('authors', ())) assert author_count > 0 # delete all author affiliations for author_index in range(author_count): json_data['authors'][author_index].pop('affiliations') # run workflow with requests_mock.Mocker() as m: m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188', content=read_response( 'article_upload', 'export.arxiv.org_api_query_search_query_id_1808.08188')) workflow = run_article_upload_with_data(json_data, m) assert workflow.status == WorkflowStatus.HALTED assert workflow.objects[0].extra_data['_message'] == ( "No affiliations for author: {u'raw_name': u'We-Fu Chang', u'surname': u'Chang', u'given_names': u'We-Fu', " "u'full_name': u'Chang, We-Fu'}.")
def run_article_upload_with_file(input_json_filename, mock_address): """Uses input_json_filename to load hepcrawl response and to run article_upload workflow. Returns the Workflow object.""" json_data = read_response_as_json('hepcrawl', input_json_filename) return run_article_upload_with_data(json_data, mock_address)
def test_author_export(): expected_result = read_response_as_json('tools', 'authors_expected.json') result = base_test_tool(export_function=authors_export, country=None) assert result['data'] == expected_result
def test_affiliation_export_country(): expected_result = read_response_as_json('tools', 'affiliation_expected_us.json') result = base_test_tool(export_function=affiliations_export, country='USA') assert result['data'] == expected_result