def test_populate_arxiv_document_retries_on_connection_error( mock_requests_get): mock_requests_get.side_effect = side_effect_requests_get schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'hep-ex', ], 'value': '1605.03814', }, ], } # literature/1458270 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() with pytest.raises(DownloadError): populate_arxiv_document(obj, eng) assert mock_requests_get.call_count == 10
def test_populate_arxiv_document_logs_on_pdf_not_existing(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1707.02785', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1707.02785.html')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1707.02785', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None expected = 'No PDF is available for 1707.02785' result = obj.log._info.getvalue() assert expected == result
def test_populate_arxiv_document_does_not_duplicate_files_if_called_multiple_times( ): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1605.03844', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03844.pdf')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1605.03844', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None assert populate_arxiv_document(obj, eng) is None expected = [ { 'key': '1605.03844.pdf', 'fulltext': True, 'hidden': True, 'material': 'preprint', 'original_url': 'http://export.arxiv.org/pdf/1605.03844', 'url': 'http://export.arxiv.org/pdf/1605.03844', 'source': 'arxiv', }, ] result = obj.data['documents'] assert expected == result
def test_populate_arxiv_document_retries_on_error(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1605.03814', [ { 'content': '', 'status_code': 500, }, { 'content': pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03814.pdf')), 'status_code': 200, }, ], ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'hep-ex', ], 'value': '1605.03814', }, ], } # literature/1458270 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None expected_url = 'http://export.arxiv.org/pdf/1605.03814' expected_documents = [ { 'key': '1605.03814.pdf', 'fulltext': True, 'hidden': True, 'material': 'preprint', 'original_url': expected_url, 'url': expected_url, 'source': 'arxiv', } ] documents = obj.data['documents'] assert expected_documents == documents
def test_populate_arxiv_document_retries_on_error(): response500 = {'content': '', 'status_code': 500} response200 = { 'content': pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03814.pdf')), 'status_code': 200, } with requests_mock.Mocker() as requests_mocker: requests_mocker.get( 'http://export.arxiv.org/pdf/1605.03814', (response500, response200), ) requests_mocker.get( 'http://arxiv.org/pdf/1605.03814', (response500,) ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'hep-ex', ], 'value': '1605.03814', }, ], } # literature/1458270 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None expected_url = 'http://export.arxiv.org/pdf/1605.03814' expected_documents = [ { 'key': '1605.03814.pdf', 'fulltext': True, 'hidden': True, 'material': 'preprint', 'original_url': expected_url, 'url': expected_url, 'source': 'arxiv', } ] documents = obj.data['documents'] assert expected_documents == documents
def test_populate_arxiv_document_does_not_duplicate(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1605.03844', }, ], } # literature/1458302 assert validate(data['arxiv_eprints'], subschema) is None extra_data = {} files = MockFiles({}) obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None assert populate_arxiv_document(obj, eng) is None expected_url = 'http://export.arxiv.org/pdf/1605.03844' expected_documents = [ { 'key': '1605.03844.pdf', 'fulltext': True, 'hidden': True, 'material': 'preprint', 'original_url': expected_url, 'url': expected_url, 'source': 'arxiv', } ] documents = obj.data['documents'] assert expected_documents == documents
def test_populate_arxiv_document_logs_on_pdf_not_existing(): response500 = {'content': '', 'status_code': 500} response200 = { 'content': pkg_resources.resource_string( __name__, os.path.join('fixtures', '1707.02785.html')), 'status_code': 200, } with requests_mock.Mocker() as requests_mocker: requests_mocker.get( 'http://export.arxiv.org/pdf/1707.02785', (response200, ), ) requests_mocker.get( 'http://arxiv.org/pdf/1707.02785', (response500, ), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1707.02785', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None expected = 'No PDF is available for 1707.02785' result = obj.log._info.getvalue() assert expected == result