def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball): mock_process_tarball.side_effect = InvalidTarball schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00626', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00626.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00626', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball): mock_process_tarball.side_effect = DelegateError schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00624', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00624.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00624', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Error extracting plots for 1612.00624. Report and skip.' result = obj.log._error.getvalue() assert expected == result
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball): mock_process_tarball.side_effect = DelegateError schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '1612.00624')) data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00624', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00624.tar.gz': AttrDict({'file': AttrDict({ 'uri': filename, })}) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None assert '1612.00624' in obj.log._error.getvalue()
def test_arxiv_plot_extract_is_safe_to_rerun(mock_os): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '0804.1873.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'nucl-ex', ], 'value': '0804.1873', }, ], } # literature/783246 extra_data = {} files = MockFiles({ '0804.1873.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() try: temporary_dir = mkdtemp() mock_os.path.abspath.return_value = temporary_dir for _ in range(2): assert arxiv_plot_extract(obj, eng) is None expected_figures = [{ 'url': '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png', 'source': 'arxiv', 'material': 'preprint', 'key': 'figure1.png', 'caption': 'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.' }] result = obj.data['figures'] assert expected_figures == result expected_files = ['0804.1873.tar.gz', 'figure1.png'] assert expected_files == obj.files.keys finally: rmtree(temporary_dir)
def test_arxiv_plot_extract_populates_files_with_plots(mock_os, tmpdir): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '0804.1873.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'nucl-ex', ], 'value': '0804.1873', }, ], } # literature/783246 extra_data = {} files = MockFiles({ '0804.1873.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() temporary_dir = tmpdir.mkdir('plots') mock_os.path.abspath.return_value = str(temporary_dir) assert arxiv_plot_extract(obj, eng) is None expected = [{ 'url': '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png', 'source': 'arxiv', 'material': 'preprint', 'key': 'figure1.png', 'caption': 'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.' }] result = obj.data['figures'] assert expected == result expected = 'Added 1 plots.' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_plot_extract_is_safe_to_rerun(mock_os): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '0804.1873.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'nucl-ex', ], 'value': '0804.1873', }, ], } # literature/783246 extra_data = {} files = MockFiles({ '0804.1873.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() try: temporary_dir = mkdtemp() mock_os.path.abspath.return_value = temporary_dir for _ in range(2): assert arxiv_plot_extract(obj, eng) is None expected_figures = [{ 'url': '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/figure1.png', 'source': 'arxiv', 'material': 'preprint', 'key': 'figure1.png', 'caption': 'Difference (in MeV) between the theoretical and experimental masses for the 2027 selected nuclei as a function of the mass number.' }] result = obj.data['figures'] assert expected_figures == result expected_files = ['0804.1873.tar.gz', 'figure1.png'] assert expected_files == obj.files.keys finally: rmtree(temporary_dir)
def test_arxiv_plot_extract_retries_on_io_error(mock_os, tmpdir): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '1711.10662.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1711.10662', }, ], } # holdingpen/807096 extra_data = {} files = MockFiles({ '1711.10662.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() temporary_dir = tmpdir.mkdir('plots') mock_os.path.abspath.return_value = str(temporary_dir) with pytest.raises(IOError): with patch( 'inspirehep.modules.workflows.tasks.arxiv.open') as mock_open: mock_open.side_effect = side_effect_open arxiv_plot_extract(obj, eng) assert mock_open.call_count == 5
def test_arxiv_plot_extract_populates_files_with_plots(mock_os): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '0804.1873.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'nucl-ex', ], 'value': '0804.1873', }, ], } # literature/783246 extra_data = {} files = MockFiles({ '0804.1873.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() try: temporary_dir = mkdtemp() mock_os.path.abspath.return_value = temporary_dir assert arxiv_plot_extract(obj, eng) is None expected = obj.files['figure1']['description'] result = ('00000 Difference (in MeV) between the theoretical and ' 'experimental masses for the 2027 selected nuclei as a ' 'function of the mass number.') assert expected == result expected = 'Added 1 plots.' result = obj.log._info.getvalue() assert expected == result finally: rmtree(temporary_dir)
def test_arxiv_plot_extract_handles_duplicate_plot_names(mock_os): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '1711.10662.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1711.10662', }, ], } # holdingpen/807096 extra_data = {} files = MockFiles({ '1711.10662.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() try: temporary_dir = mkdtemp() mock_os.path.abspath.return_value = temporary_dir assert arxiv_plot_extract(obj, eng) is None assert len(obj.data['figures']) == 66 assert len(obj.files.keys) == 67 finally: rmtree(temporary_dir)
def test_arxiv_plot_extract_handles_duplicate_plot_names(mock_os): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '1711.10662.tar.gz')) data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1711.10662', }, ], } # holdingpen/807096 extra_data = {} files = MockFiles({ '1711.10662.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': filename, }), }), }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() try: temporary_dir = mkdtemp() mock_os.path.abspath.return_value = temporary_dir assert arxiv_plot_extract(obj, eng) is None assert len(obj.data['figures']) == 66 assert len(obj.files.keys) == 67 finally: rmtree(temporary_dir)
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball): mock_process_tarball.side_effect = DelegateError schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00624', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00624.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00624', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Error extracting plots for 1612.00624. Report and skip.' result = obj.log._error.getvalue() assert expected == result
def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball): mock_process_tarball.side_effect = InvalidTarball schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00626', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00626.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00626', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_plot_extract_no_file(mock_process_tarball): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00626', }, ], } # synthetic data extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None assert 'No file named=' in obj.log._info.getvalue() mock_process_tarball.assert_not_called()