def manage_arXiv_graphics(ft_file, bibcode, arx_id, category, update=False, dryrun=False):
    # If we're updating, grab the existing database entry
    if update:
        graphic = db.session.query(GraphicsModel).filter(GraphicsModel.bibcode == bibcode).first()
        if not graphic:
            sys.stderr.write("Note: update for %s, but no existing record found!\n" % bibcode)
    else:
        graphic = None
    # First get lists of (La)TeX and image files
    tex_files, img_files, xdir = file_ops.untar(ft_file)
    # If we didn't find any image files, skip
    if len(img_files) == 0:
        return
    figures = []
    # Next convert the image files
    # All the original images than cannot be converted will be
    # removed from the list of originals
    try:
        img_files, converted_images = file_ops.convert_images(img_files)
    except Exception, exc:
        sys.stderr.write("Image conversion barfed for %s. Skipping.\n" % bibcode)
        # Remove the temporary directory
        try:
            shutil.rmtree(xdir)
        except:
            pass
        return
 def test_extract_captions(self):
     '''Test extracting captions'''
     if not self.app.config.get('GRAPHICS_ENABLE_UPDATES', False):
         return True
     from file_ops import untar
     from file_ops import convert_images
     from invenio_tools import extract_captions
     from invenio_tools import prepare_image_data
     from invenio_tools import extract_context
     # Assuming the previous tests succeeded, we know we can extract
     # the images fro the archive
     archive = "%s/tests/stubdata/arXiv/YY/NN.tar.gz" % PROJECT_HOME
     tex, imgs, sdir = untar(archive)
     # Now use the update machinery to convert the images to PNGs
     converted_images = convert_images(imgs)
     # Pick the right TeX file
     tex_file = [f for f in tex if f.split('.')[-1] == 'tex'][0]
     # Extract captions
     TMP = self.app.config.get('GRAPHICS_TMP_DIR')
     im_data = extract_captions(tex_file, TMP, converted_images)
     # Did we get what we expected
     expected = ('', 'noimgDistance to M~51', '')
     self.assertEqual(im_data[0], expected)
     # Check cleaned data
     cleaned = prepare_image_data(im_data, tex_file, converted_images)
     self.assertEqual(os.path.basename(cleaned[-1][0]), 'figure09.png')
     self.assertEqual(cleaned[-1][1], 'figure05.ps')
     self.assertEqual(cleaned[-1][2], '')
     # Check extracted context
     context = extract_context(tex_file, cleaned)
     expected = ('', 'noimgDistance to M~51', '', [])
     self.assertEqual(context[0], expected)
     # Cleanup the extracted data
     extract_dir = "%s/NN" % TMP
     shutil.rmtree(extract_dir)
 def test_convert_images(self):
     '''Test converting images to PNG files'''
     if not self.app.config.get('GRAPHICS_ENABLE_UPDATES', False):
         return True
     from file_ops import untar
     from file_ops import convert_images
     import magic
     # Assuming the previous tests succeeded, we know we can extract
     # the images fro the archive
     archive = "%s/tests/stubdata/arXiv/YY/NN.tar.gz" % PROJECT_HOME
     tex, imgs, sdir = untar(archive)
     # Now use the update machinery to convert the images to PNGs
     remainder, converted_images = convert_images(imgs)
     # No files should have failed to convert
     self.assertEqual([os.path.basename(i) for i in imgs],
                      [os.path.basename(i) for i in remainder])
     # Did we get the expected PNGs?
     imgs_expected = ['figure01.png',
                      'figure02.png',
                      'figure03.png',
                      'figure04.png',
                      'figure05.png',
                      'figure06.png',
                      'figure07.png',
                      'figure08.png',
                      'figure09.png']
     self.assertEqual([os.path.basename(i) for i in converted_images],
                      imgs_expected)
     # And they really are all PNG files
     res = [magic.from_file(i).find('PNG') > -1 for i in converted_images]
     self.assertTrue(False not in res)
Exemple #4
0
 def test_convert_images(self):
     '''Test converting images to PNG files'''
     if not self.app.config.get('GRAPHICS_ENABLE_UPDATES', False):
         return True
     from file_ops import untar
     from file_ops import convert_images
     import magic
     # Assuming the previous tests succeeded, we know we can extract
     # the images fro the archive
     archive = "%s/tests/stubdata/arXiv/YY/NN.tar.gz" % PROJECT_HOME
     tex, imgs, sdir = untar(archive)
     # Now use the update machinery to convert the images to PNGs
     remainder, converted_images = convert_images(imgs)
     # No files should have failed to convert
     self.assertEqual([os.path.basename(i) for i in imgs],
                      [os.path.basename(i) for i in remainder])
     # Did we get the expected PNGs?
     imgs_expected = [
         'figure01.png', 'figure02.png', 'figure03.png', 'figure04.png',
         'figure05.png', 'figure06.png', 'figure07.png', 'figure08.png',
         'figure09.png'
     ]
     self.assertEqual([os.path.basename(i) for i in converted_images],
                      imgs_expected)
     # And they really are all PNG files
     res = [magic.from_file(i).find('PNG') > -1 for i in converted_images]
     self.assertTrue(False not in res)
Exemple #5
0
 def test_extract_captions(self):
     '''Test extracting captions'''
     if not self.app.config.get('GRAPHICS_ENABLE_UPDATES', False):
         return True
     from file_ops import untar
     from file_ops import convert_images
     from invenio_tools import extract_captions
     from invenio_tools import prepare_image_data
     from invenio_tools import extract_context
     # Assuming the previous tests succeeded, we know we can extract
     # the images fro the archive
     archive = "%s/tests/stubdata/arXiv/YY/NN.tar.gz" % PROJECT_HOME
     tex, imgs, sdir = untar(archive)
     # Now use the update machinery to convert the images to PNGs
     converted_images = convert_images(imgs)
     # Pick the right TeX file
     tex_file = [f for f in tex if f.split('.')[-1] == 'tex'][0]
     # Extract captions
     TMP = self.app.config.get('GRAPHICS_TMP_DIR')
     im_data = extract_captions(tex_file, TMP, converted_images)
     # Did we get what we expected
     expected = ('', 'noimgDistance to M~51', '')
     self.assertEqual(im_data[0], expected)
     # Check cleaned data
     cleaned = prepare_image_data(im_data, tex_file, converted_images)
     self.assertEqual(os.path.basename(cleaned[-1][0]), 'figure09.png')
     self.assertEqual(cleaned[-1][1], 'figure05.ps')
     self.assertEqual(cleaned[-1][2], '')
     # Check extracted context
     context = extract_context(tex_file, cleaned)
     expected = ('', 'noimgDistance to M~51', '', [])
     self.assertEqual(context[0], expected)
     # Cleanup the extracted data
     extract_dir = "%s/NN" % TMP
     shutil.rmtree(extract_dir)
def manage_arXiv_graphics(ft_file,
                          bibcode,
                          arx_id,
                          category,
                          update=False,
                          dryrun=False):
    # If we're updating, grab the existing database entry
    if update:
        graphic = db.session.query(GraphicsModel).filter(
            GraphicsModel.bibcode == bibcode).first()
        if not graphic:
            sys.stderr.write(
                'Note: update for %s, but no existing record found!\n' %
                bibcode)
    else:
        graphic = None
    # First get lists of (La)TeX and image files
    tex_files, img_files, xdir = file_ops.untar(ft_file)
    # If we didn't find any image files, skip
    if len(img_files) == 0:
        return
    figures = []
    # Next convert the image files
    # All the original images than cannot be converted will be
    # removed from the list of originals
    try:
        img_files, converted_images = file_ops.convert_images(img_files)
    except Exception, exc:
        sys.stderr.write('Image conversion barfed for %s. Skipping.\n' %
                         bibcode)
        # Remove the temporary directory
        try:
            shutil.rmtree(xdir)
        except:
            pass
        return