Ejemplo n.º 1
0
def test_arxiv_fulltext_download_logs_on_pdf_not_existing():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1707.02785',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1707.02785.html')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'cs.CV',
                    ],
                    'value': '1707.02785',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert arxiv_fulltext_download(obj, eng) is None

        expected = 'No PDF is available for 1707.02785'
        result = obj.log._info.getvalue()

        assert expected == result
Ejemplo n.º 2
0
def test_arxiv_fulltext_download_logs_on_success():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'physics.ins-det',
                    ],
                    'value': '1605.03844',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert arxiv_fulltext_download(obj, eng) is None

        expected = 'PDF retrieved from arXiv for 1605.03844'
        result = obj.log._info.getvalue()

        assert expected == result
def test_arxiv_fulltext_download_does_not_duplicate_documents():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'physics.ins-det',
                    ],
                    'value': '1605.03844',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert arxiv_fulltext_download(obj, eng) is None
        assert arxiv_fulltext_download(obj, eng) is None

        expected = [{
            'fulltext': True,
            'original_url': 'http://export.arxiv.org/pdf/1605.03844',
            'url':
            '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf',
            'material': 'preprint',
            'source': 'arxiv',
            'key': '1605.03844.pdf',
            'hidden': True
        }]
        result = obj.data['documents']

        assert expected == result
Ejemplo n.º 4
0
def test_arxiv_fulltext_download_retries_on_error():
    httpretty.register_uri(
        httpretty.GET,
        'http://export.arxiv.org/pdf/1605.03814',
        responses=[
            httpretty.Response(body='', status=500),
            httpretty.Response(body=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03814.pdf')),
                               status='200'),
        ])

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1605.03814',
            },
        ],
    }  # literature/1458270
    extra_data = {}
    files = MockFiles({})
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_fulltext_download(obj, eng) is None

    expected = 'PDF retrieved from arXiv for 1605.03814'
    result = obj.log._info.getvalue()

    assert expected == result