def test_arxiv_fulltext_download_logs_on_pdf_not_existing(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1707.02785', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1707.02785.html')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1707.02785', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_fulltext_download(obj, eng) is None expected = 'No PDF is available for 1707.02785' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_fulltext_download_logs_on_success(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1605.03844', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03844.pdf')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1605.03844', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_fulltext_download(obj, eng) is None expected = 'PDF retrieved from arXiv for 1605.03844' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_fulltext_download_does_not_duplicate_documents(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1605.03844', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03844.pdf')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1605.03844', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_fulltext_download(obj, eng) is None assert arxiv_fulltext_download(obj, eng) is None expected = [{ 'fulltext': True, 'original_url': 'http://export.arxiv.org/pdf/1605.03844', 'url': '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf', 'material': 'preprint', 'source': 'arxiv', 'key': '1605.03844.pdf', 'hidden': True }] result = obj.data['documents'] assert expected == result
def test_arxiv_fulltext_download_retries_on_error(): httpretty.register_uri( httpretty.GET, 'http://export.arxiv.org/pdf/1605.03814', responses=[ httpretty.Response(body='', status=500), httpretty.Response(body=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03814.pdf')), status='200'), ]) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'hep-ex', ], 'value': '1605.03814', }, ], } # literature/1458270 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_fulltext_download(obj, eng) is None expected = 'PDF retrieved from arXiv for 1605.03814' result = obj.log._info.getvalue() assert expected == result