Ejemplo n.º 1
0
def test_populate_arxiv_document_retries_on_connection_error(
        mock_requests_get):
    mock_requests_get.side_effect = side_effect_requests_get

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1605.03814',
            },
        ],
    }  # literature/1458270

    extra_data = {}
    files = MockFiles({})
    assert validate(data['arxiv_eprints'], subschema) is None
    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    with pytest.raises(DownloadError):
        populate_arxiv_document(obj, eng)

    assert mock_requests_get.call_count == 10
def test_populate_arxiv_document_logs_on_pdf_not_existing():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET', 'http://export.arxiv.org/pdf/1707.02785',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1707.02785.html')),
        )
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'cs.CV',
                    ],
                    'value': '1707.02785',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None

        expected = 'No PDF is available for 1707.02785'
        result = obj.log._info.getvalue()

        assert expected == result
Ejemplo n.º 3
0
def test_populate_arxiv_document_does_not_duplicate_files_if_called_multiple_times(
):
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'physics.ins-det',
                    ],
                    'value': '1605.03844',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None
        assert populate_arxiv_document(obj, eng) is None

        expected = [
            {
                'key': '1605.03844.pdf',
                'fulltext': True,
                'hidden': True,
                'material': 'preprint',
                'original_url': 'http://export.arxiv.org/pdf/1605.03844',
                'url': 'http://export.arxiv.org/pdf/1605.03844',
                'source': 'arxiv',
            },
        ]
        result = obj.data['documents']

        assert expected == result
def test_populate_arxiv_document_retries_on_error():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET', 'http://export.arxiv.org/pdf/1605.03814',
            [
                {
                    'content': '',
                    'status_code': 500,
                },
                {
                    'content': pkg_resources.resource_string(
                        __name__, os.path.join('fixtures', '1605.03814.pdf')),
                    'status_code': 200,
                },
            ],
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-ex',
                    ],
                    'value': '1605.03814',
                },
            ],
        }  # literature/1458270
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None

        expected_url = 'http://export.arxiv.org/pdf/1605.03814'
        expected_documents = [
            {
                'key': '1605.03814.pdf',
                'fulltext': True,
                'hidden': True,
                'material': 'preprint',
                'original_url': expected_url,
                'url': expected_url,
                'source': 'arxiv',
            }
        ]
        documents = obj.data['documents']
        assert expected_documents == documents
Ejemplo n.º 5
0
def test_populate_arxiv_document_retries_on_error():
    response500 = {'content': '', 'status_code': 500}
    response200 = {
        'content': pkg_resources.resource_string(
            __name__, os.path.join('fixtures', '1605.03814.pdf')),
        'status_code': 200,
    }
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.get(
            'http://export.arxiv.org/pdf/1605.03814',
            (response500, response200),
        )
        requests_mocker.get(
            'http://arxiv.org/pdf/1605.03814',
            (response500,)
        )
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-ex',
                    ],
                    'value': '1605.03814',
                },
            ],
        }  # literature/1458270
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None

        expected_url = 'http://export.arxiv.org/pdf/1605.03814'
        expected_documents = [
            {
                'key': '1605.03814.pdf',
                'fulltext': True,
                'hidden': True,
                'material': 'preprint',
                'original_url': expected_url,
                'url': expected_url,
                'source': 'arxiv',
            }
        ]
        documents = obj.data['documents']
        assert expected_documents == documents
def test_populate_arxiv_document_does_not_duplicate():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1605.03844',
            },
        ],
    }  # literature/1458302

    assert validate(data['arxiv_eprints'], subschema) is None
    extra_data = {}
    files = MockFiles({})
    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert populate_arxiv_document(obj, eng) is None
    assert populate_arxiv_document(obj, eng) is None

    expected_url = 'http://export.arxiv.org/pdf/1605.03844'
    expected_documents = [
        {
            'key': '1605.03844.pdf',
            'fulltext': True,
            'hidden': True,
            'material': 'preprint',
            'original_url': expected_url,
            'url': expected_url,
            'source': 'arxiv',
        }
    ]
    documents = obj.data['documents']

    assert expected_documents == documents
Ejemplo n.º 7
0
def test_populate_arxiv_document_logs_on_pdf_not_existing():
    response500 = {'content': '', 'status_code': 500}
    response200 = {
        'content':
        pkg_resources.resource_string(
            __name__, os.path.join('fixtures', '1707.02785.html')),
        'status_code':
        200,
    }
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.get(
            'http://export.arxiv.org/pdf/1707.02785',
            (response200, ),
        )
        requests_mocker.get(
            'http://arxiv.org/pdf/1707.02785',
            (response500, ),
        )
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'cs.CV',
                    ],
                    'value': '1707.02785',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None

        expected = 'No PDF is available for 1707.02785'
        result = obj.log._info.getvalue()

        assert expected == result