def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00626',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00626.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00626',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626'
    result = obj.log._info.getvalue()

    assert expected == result
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = DelegateError

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00624',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00624.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00624',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Error extracting plots for 1612.00624. Report and skip.'
    result = obj.log._error.getvalue()

    assert expected == result
Esempio n. 3
0
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = DelegateError

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1612.00624'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00624',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00624.tar.gz':
        AttrDict({'file': AttrDict({
            'uri': filename,
        })})
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None
    assert '1612.00624' in obj.log._error.getvalue()
def test_arxiv_plot_extract_is_safe_to_rerun(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        for _ in range(2):
            assert arxiv_plot_extract(obj, eng) is None

            expected_figures = [{
                'url':
                '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png',
                'source':
                'arxiv',
                'material':
                'preprint',
                'key':
                'figure1.png',
                'caption':
                'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.'
            }]
            result = obj.data['figures']

            assert expected_figures == result

            expected_files = ['0804.1873.tar.gz', 'figure1.png']

            assert expected_files == obj.files.keys

    finally:
        rmtree(temporary_dir)
Esempio n. 5
0
def test_arxiv_plot_extract_populates_files_with_plots(mock_os, tmpdir):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    temporary_dir = tmpdir.mkdir('plots')
    mock_os.path.abspath.return_value = str(temporary_dir)

    assert arxiv_plot_extract(obj, eng) is None

    expected = [{
        'url':
        '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png',
        'source':
        'arxiv',
        'material':
        'preprint',
        'key':
        'figure1.png',
        'caption':
        'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.'
    }]
    result = obj.data['figures']

    assert expected == result

    expected = 'Added 1 plots.'
    result = obj.log._info.getvalue()

    assert expected == result
def test_arxiv_plot_extract_is_safe_to_rerun(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        for _ in range(2):
            assert arxiv_plot_extract(obj, eng) is None

            expected_figures = [{
                'url': '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png',
                'source': 'arxiv',
                'material': 'preprint',
                'key': 'figure1.png',
                'caption': 'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.'
            }]
            result = obj.data['figures']

            assert expected_figures == result

            expected_files = ['0804.1873.tar.gz', 'figure1.png']

            assert expected_files == obj.files.keys

    finally:
        rmtree(temporary_dir)
Esempio n. 7
0
def test_arxiv_plot_extract_retries_on_io_error(mock_os, tmpdir):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1711.10662.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'cs.CV',
                ],
                'value': '1711.10662',
            },
        ],
    }  # holdingpen/807096
    extra_data = {}
    files = MockFiles({
        '1711.10662.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    temporary_dir = tmpdir.mkdir('plots')
    mock_os.path.abspath.return_value = str(temporary_dir)

    with pytest.raises(IOError):
        with patch(
                'inspirehep.modules.workflows.tasks.arxiv.open') as mock_open:
            mock_open.side_effect = side_effect_open
            arxiv_plot_extract(obj, eng)
            assert mock_open.call_count == 5
Esempio n. 8
0
def test_arxiv_plot_extract_populates_files_with_plots(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert arxiv_plot_extract(obj, eng) is None

        expected = obj.files['figure1']['description']
        result = ('00000 Difference (in MeV) between the theoretical and '
                  'experimental masses for the 2027 selected nuclei as a '
                  'function of the mass number.')

        assert expected == result

        expected = 'Added 1 plots.'
        result = obj.log._info.getvalue()

        assert expected == result
    finally:
        rmtree(temporary_dir)
def test_arxiv_plot_extract_handles_duplicate_plot_names(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1711.10662.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'cs.CV',
                ],
                'value': '1711.10662',
            },
        ],
    }  # holdingpen/807096
    extra_data = {}
    files = MockFiles({
        '1711.10662.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert arxiv_plot_extract(obj, eng) is None

        assert len(obj.data['figures']) == 66
        assert len(obj.files.keys) == 67

    finally:
        rmtree(temporary_dir)
def test_arxiv_plot_extract_handles_duplicate_plot_names(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1711.10662.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'cs.CV',
                ],
                'value': '1711.10662',
            },
        ],
    }  # holdingpen/807096
    extra_data = {}
    files = MockFiles({
        '1711.10662.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert arxiv_plot_extract(obj, eng) is None

        assert len(obj.data['figures']) == 66
        assert len(obj.files.keys) == 67

    finally:
        rmtree(temporary_dir)
Esempio n. 11
0
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = DelegateError

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00624',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00624.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00624',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Error extracting plots for 1612.00624. Report and skip.'
    result = obj.log._error.getvalue()

    assert expected == result
Esempio n. 12
0
def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00626',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00626.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00626',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626'
    result = obj.log._info.getvalue()

    assert expected == result
Esempio n. 13
0
def test_arxiv_plot_extract_no_file(mock_process_tarball):

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']
    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00626',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({})
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None
    assert 'No file named=' in obj.log._info.getvalue()
    mock_process_tarball.assert_not_called()