コード例 #1
0
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = DelegateError

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1612.00624'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00624',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00624.tar.gz':
        AttrDict({'file': AttrDict({
            'uri': filename,
        })})
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None
    assert '1612.00624' in obj.log._error.getvalue()
コード例 #2
0
def test_arxiv_author_list_handles_auto_ignore_comment():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1703.09986.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1703.09986',
            },
        ],
    }  # record/1519995
    extra_data = {}
    files = MockFiles({
        '1703.09986.tar.gz':
        AttrDict({'file': AttrDict({
            'uri': filename,
        })})
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    assert default_arxiv_author_list(obj, eng) is None
コード例 #3
0
def test_arxiv_author_list_with_missing_tarball():
    schema = load_schema('hep')

    eprints_subschema = schema['properties']['arxiv_eprints']
    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1703.09986',
            },
        ],
    }  # record/1519995
    validate(data['arxiv_eprints'], eprints_subschema)

    extra_data = {}
    files = MockFiles({
        'jessica.jones.tar.gz':
        AttrDict({'file': AttrDict({
            'uri': 'alias.investigations',
        })})
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()
    expected_message = \
        'Skipping author list extraction, no tarball with name "1703.09986.tar.gz" found'

    assert default_arxiv_author_list(obj, eng) is None

    assert expected_message in obj.log._info.getvalue()
コード例 #4
0
def test_arxiv_plot_extract_is_safe_to_rerun(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        for _ in range(2):
            assert arxiv_plot_extract(obj, eng) is None

            expected_figures = [{
                'url':
                '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png',
                'source':
                'arxiv',
                'material':
                'preprint',
                'key':
                'figure1.png',
                'caption':
                'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.'
            }]
            result = obj.data['figures']

            assert expected_figures == result

            expected_files = ['0804.1873.tar.gz', 'figure1.png']

            assert expected_files == obj.files.keys

    finally:
        rmtree(temporary_dir)
コード例 #5
0
def test_prepare_files_ignores_keys_not_ending_with_pdf():
    data = {}
    extra_data = {}
    files = MockFiles({
        'foo.bar':
        AttrDict({
            'obj':
            AttrDict({
                'file': AttrDict({
                    'uri': '/data/foo.pdf',
                }),
            }),
        }),
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert prepare_files(obj, eng) is None

    expected = {}
    result = obj.data

    assert expected == result

    expected = ''
    result = obj.log._info.getvalue()

    assert expected == result
コード例 #6
0
def test_prepare_files_annotates_files_from_arxiv():
    schema = load_schema('hep')
    _fft_schema = schema['properties']['_fft']
    arxiv_eprints_schema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': ['hep-th'],
                'value': 'hep-th/9711200',
            },
        ],
    }
    extra_data = {}
    files = MockFiles({
        'foo.pdf':
        AttrDict({
            'obj':
            AttrDict({
                'file': AttrDict({
                    'uri': '/data/foo.pdf',
                }),
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], arxiv_eprints_schema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert prepare_files(obj, eng) is None

    expected_fft = [
        {
            'path': '/data/foo.pdf',
            'type': 'arXiv',
            'filename': 'arxiv:foo',
            'format': '.pdf',
        },
    ]
    expected_arxiv_eprints = [
        {
            'categories': [
                'hep-th',
            ],
            'value': 'hep-th/9711200',
        },
    ]
    result = obj.data

    assert validate(result['_fft'], _fft_schema) is None
    assert expected_fft == result['_fft']

    assert validate(result['arxiv_eprints'], arxiv_eprints_schema) is None
    assert expected_arxiv_eprints == result['arxiv_eprints']

    expected = 'Non-user PDF files added to FFT.'
    result = obj.log._info.getvalue()

    assert expected == result
コード例 #7
0
def test_arxiv_author_list_logs_on_error(mock_untar):
    mock_untar.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1605.07707'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-th',
                ],
                'value': '1605.07707',
            },
        ],
    }  # synthethic data
    extra_data = {}
    files = MockFiles({
        '1605.07707.tar.gz':
        AttrDict({'file': AttrDict({
            'uri': filename,
        })})
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    assert default_arxiv_author_list(obj, eng) is None
    assert '1605.07707' in obj.log._info.getvalue()
コード例 #8
0
def test_arxiv_author_list_handles_multiple_author_xml_files():
    schema = load_schema('hep')
    eprints_subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1703.09986.multiple_author_lists.tar.gz'))

    data = {
        '$schema': 'http://localhost:5000/hep.json',
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1703.09986',
            },
        ],
    }  # record/1519995
    validate(data['arxiv_eprints'], eprints_subschema)

    extra_data = {}
    files = MockFiles({
        '1703.09986.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': filename,
            })
        })
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()
    default_arxiv_author_list(obj, eng)

    authors_subschema = schema['properties']['authors']
    expected_authors = [
        {
            'affiliations': [{'value': 'Yerevan Phys. Inst.'}],
            'ids': [
                {'value': 'INSPIRE-00312131', 'schema': 'INSPIRE ID'},
                {'value': 'CERN-432142', 'schema': 'CERN'},
            ],
            'full_name': 'Sirunyan, Albert M',
        },
        {
            'affiliations': [{'value': 'Yerevan Phys. Inst.'}],
            'ids': [
                {'value': 'INSPIRE-00312132', 'schema': 'INSPIRE ID'},
                {'value': 'CERN-432143', 'schema': 'CERN'},
            ],
            'full_name': 'Weary, Jake',
        }
    ]
    validate(expected_authors, authors_subschema)

    assert obj.data.get('authors') == expected_authors
コード例 #9
0
def test_arxiv_plot_extract_populates_files_with_plots(mock_os, tmpdir):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    temporary_dir = tmpdir.mkdir('plots')
    mock_os.path.abspath.return_value = str(temporary_dir)

    assert arxiv_plot_extract(obj, eng) is None

    expected = [{
        'url':
        '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png',
        'source':
        'arxiv',
        'material':
        'preprint',
        'key':
        'figure1.png',
        'caption':
        'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.'
    }]
    result = obj.data['figures']

    assert expected == result

    expected = 'Added 1 plots.'
    result = obj.log._info.getvalue()

    assert expected == result
コード例 #10
0
def test_arxiv_plot_extract_populates_files_with_plots(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '0804.1873.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-ex',
                ],
                'value': '0804.1873',
            },
        ],
    }  # literature/783246
    extra_data = {}
    files = MockFiles({
        '0804.1873.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert arxiv_plot_extract(obj, eng) is None

        expected = obj.files['figure1']['description']
        result = ('00000 Difference (in MeV) between the theoretical and '
                  'experimental masses for the 2027 selected nuclei as a '
                  'function of the mass number.')

        assert expected == result

        expected = 'Added 1 plots.'
        result = obj.log._info.getvalue()

        assert expected == result
    finally:
        rmtree(temporary_dir)
コード例 #11
0
def test_arxiv_author_list_does_not_produce_latex():
    schema = load_schema('hep')

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1802.03388.tar.gz'))

    eprints_subschema = schema['properties']['arxiv_eprints']
    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1802.03388',
            },
        ],
    }
    validate(data['arxiv_eprints'], eprints_subschema)

    extra_data = {}
    files = MockFiles({
        '1802.03388.tar.gz': AttrDict({'file': AttrDict({'uri': filename})})
    })

    authors_subschema = schema['properties']['authors']
    expected_authors = [
        {
            'affiliations': [{'value': 'Lund U.'}],
            'ids': [
                {
                    'value': 'INSPIRE-00061248',
                    'schema': 'INSPIRE ID'
                }
            ],
            'full_name': u'Åkesson, Torsten Paul Ake'
        },
    ]
    validate(expected_authors, authors_subschema)

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    assert default_arxiv_author_list(obj, eng) is None
    assert obj.data.get('authors') == expected_authors
コード例 #12
0
def test_arxiv_author_list_logs_on_error(mock_os, mock_untar):
    mock_untar.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-th',
                ],
                'value': '1605.07707',
            },
        ],
    }  # synthethic data
    extra_data = {}
    files = MockFiles({
        '1605.07707.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1605.07707',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert default_arxiv_author_list(obj, eng) is None

        expected = 'Invalid tarball http://export.arxiv.org/e-print/1605.07707 for arxiv_id 1605.07707'
        result = obj.log._error.getvalue()

        assert expected == result
    finally:
        rmtree(temporary_dir)
コード例 #13
0
def test_arxiv_plot_extract_handles_duplicate_plot_names(mock_os):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1711.10662.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'cs.CV',
                ],
                'value': '1711.10662',
            },
        ],
    }  # holdingpen/807096
    extra_data = {}
    files = MockFiles({
        '1711.10662.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert arxiv_plot_extract(obj, eng) is None

        assert len(obj.data['figures']) == 66
        assert len(obj.files.keys) == 67

    finally:
        rmtree(temporary_dir)
コード例 #14
0
def test_prepare_files():
    schema = load_schema('hep')
    subschema = schema['properties']['_fft']

    data = {}
    extra_data = {}
    files = MockFiles({
        'foo.pdf':
        AttrDict({
            'obj':
            AttrDict({
                'file': AttrDict({
                    'uri': '/data/foo.pdf',
                }),
            }),
        }),
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert prepare_files(obj, eng) is None

    expected = [
        {
            'path': '/data/foo.pdf',
            'type': 'INSPIRE-PUBLIC',
            'filename': 'foo',
            'format': '.pdf',
        },
    ]
    result = obj.data

    assert validate(result['_fft'], subschema) is None
    assert expected == result['_fft']

    expected = 'Non-user PDF files added to FFT.'
    result = obj.log._info.getvalue()

    assert expected == result
コード例 #15
0
def test_arxiv_plot_extract_retries_on_io_error(mock_os, tmpdir):
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1711.10662.tar.gz'))

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'cs.CV',
                ],
                'value': '1711.10662',
            },
        ],
    }  # holdingpen/807096
    extra_data = {}
    files = MockFiles({
        '1711.10662.tar.gz':
        AttrDict({
            'file': AttrDict({
                'uri': filename,
            }),
        }),
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    temporary_dir = tmpdir.mkdir('plots')
    mock_os.path.abspath.return_value = str(temporary_dir)

    with pytest.raises(IOError):
        with patch(
                'inspirehep.modules.workflows.tasks.arxiv.open') as mock_open:
            mock_open.side_effect = side_effect_open
            arxiv_plot_extract(obj, eng)
            assert mock_open.call_count == 5
コード例 #16
0
def test_arxiv_author_list_only_overrides_authors():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1703.09986.tar.gz'))

    data = {
        '$schema': 'http://localhost:5000/hep.json',
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1703.09986',
            },
        ],
    }  # record/1519995
    validate(data['arxiv_eprints'], subschema)

    extra_data = {}
    files = MockFiles({
        '1703.09986.tar.gz': AttrDict({
            'file': AttrDict({
                'uri': filename,
            })
        })
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()
    default_arxiv_author_list(obj, eng)

    assert 'arxiv_eprints' in obj.data
    assert obj.data['arxiv_eprints'] == data['arxiv_eprints']
    assert '$schema' in obj.data
    assert obj.data['$schema'] == data['$schema']
コード例 #17
0
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = DelegateError

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00624',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00624.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00624',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Error extracting plots for 1612.00624. Report and skip.'
    result = obj.log._error.getvalue()

    assert expected == result
コード例 #18
0
def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball):
    mock_process_tarball.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'physics.ins-det',
                ],
                'value': '1612.00626',
            },
        ],
    }  # synthetic data
    extra_data = {}
    files = MockFiles({
        '1612.00626.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1612.00626',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert arxiv_plot_extract(obj, eng) is None

    expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626'
    result = obj.log._info.getvalue()

    assert expected == result
コード例 #19
0
def test_prepare_files_skips_empty_files():
    data = {}
    extra_data = {}
    files = MockFiles({
        'foo.pdf': AttrDict({}),
    })

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    assert prepare_files(obj, eng) is None

    expected = {}
    result = obj.data

    assert expected == result

    expected = ''
    result = obj.log._info.getvalue()

    assert expected == result