Python delete_existing Examples, commonpy.file_utils.delete_existing Python Examples

Example #1

0

Show file

File: test_images.py Project: caltechlibrary/handprint

def test_converted_image():
    _, tmpfile = tempfile.mkstemp(dir='/tmp', suffix='.tiff')
    thisdir = path.dirname(os.path.abspath(__file__))
    f1_file = path.join(thisdir, 'data', 'fragments', 'f1.png')
    (a, b) = converted_image(f1_file, 'tif', tmpfile)
    assert isinstance(a, str)
    assert b is None
    delete_existing(tmpfile)

Example #2

0

Show file

File: test_images.py Project: caltechlibrary/handprint

def test_reduced_image_dimensions():
    _, tmpfile = tempfile.mkstemp(dir='/tmp', suffix='.png')
    thisdir = path.dirname(os.path.abspath(__file__))
    f1_file = path.join(thisdir, 'data', 'fragments', 'f1.png')
    (a, b) = reduced_image_dimensions(f1_file, tmpfile, 100, 100)
    assert isinstance(a, str)
    assert b is None
    assert image_dimensions(tmpfile) == (100, 31)
    delete_existing(tmpfile)

Example #3

0

Show file

    def _save_article_pmc(self, dest_dir, article, xml, zip_articles):
        inform('Writing ' + article.doi)
        to_archive = []

        pdf_file = pmc_pdf_filename(article, dest_dir)
        if __debug__: log(f'downloading PDF to {pdf_file}')
        if not download_file(article.pdf, pdf_file):
            warn(f'Could not download PDF file for {article.doi}')
            article.status = 'failed-pdf-download'
        to_archive.append(pdf_file)

        jats_file = jats_filename(article, dest_dir)
        if __debug__: log(f'downloading JATS XML to {jats_file}')
        if not download_file(article.jats, jats_file):
            warn(f'Could not download JATS file for {article.doi}')
            article.status = 'failed-jats-download'
        if self.do_validate:
            if not valid_xml(jats_file, self._dtd):
                warn(f'Failed to validate JATS for article {article.doi}')
                article.status = 'failed-jats-validation'
        else:
            if __debug__: log(f'skipping DTD validation of {jats_file}')
        to_archive.append(jats_file)

        # We need to store the image with the name that appears in the
        # JATS file. That requires a little extra work to extract.
        image_extension = filename_extension(article.image)
        image_file = image_filename(article, dest_dir, ext=image_extension)
        if article.image:
            if __debug__: log(f'downloading image file to {image_file}')
            if download_file(article.image, image_file):
                with Image.open(image_file) as img:
                    converted_img = image_without_alpha(img)
                    converted_img = converted_img.convert('RGB')
                    if __debug__: log(f'converting image to TIFF format')
                    tiff_file = filename_basename(image_file) + '.tif'
                    # Using save() means that only the 1st frame of a
                    # multiframe image will be saved.
                    converted_img.save(tiff_file,
                                       dpi=_TIFF_DPI,
                                       compression=None,
                                       description=tiff_comments(article))
                    to_archive.append(tiff_file)
                # We keep only the uncompressed TIFF version.
                if __debug__: log(f'deleting original image file {image_file}')
                delete_existing(image_file)
            else:
                warn(f'Failed to download image for {article.doi}')
                article.status = 'failed-image-download'
        else:
            if __debug__:
                log(f'skipping empty image file URL for {article.doi}')

        # Finally, put the files into their own zip archive.
        if zip_articles:
            if not article.status.startswith('failed'):
                zip_file = pmc_zip_filename(article, dest_dir)
                inform(f'Creating ZIP archive file "{zip_file}"')
                archive_files(zip_file, to_archive)
                if __debug__: log(f'verifying ZIP file {zip_file}')
                verify_archive(zip_file, 'zip')
                for file in to_archive:
                    if __debug__: log(f'deleting file {file}')
                    delete_existing(file)
            else:
                warn(
                    f'ZIP archive for {article.doi} not created due to errors')

Example #4

0

Show file

    def run_services(self, item, index, base_name):
        '''Run all requested services on the image indicated by "item", using
        "index" and "base_name" to construct a download copy of the item if
        it has to be downloaded from a URL first.
        '''
        # Shortcuts to make the code more readable.
        services = self._services

        inform(f'Starting on [white]{item}[/]')
        (item_file, item_fmt) = self._get(item, base_name, index)
        if not item_file:
            return

        dest_dir = self._output_dir if self._output_dir else path.dirname(
            item_file)
        if not writable(dest_dir):
            alert(f'Cannot write output in {dest_dir}.')
            return

        # Normalize input image to the lowest common denominator.
        image = self._normalized(item, item_fmt, item_file, dest_dir)
        if not image.file:
            warn(f'Skipping {relative(item_file)}')
            return

        # Send the file to the services and get Result tuples back.
        self._senders = []
        if self._num_threads == 1:
            # For 1 thread, avoid thread pool to make debugging easier.
            results = [self._send(image, s) for s in services]
        else:
            executor = ThreadPoolExecutor(max_workers=self._num_threads,
                                          thread_name_prefix='ServiceThread')
            for service in services:
                future = executor.submit(self._send, image, service)
                self._senders.append(future)
            results = [future.result() for future in self._senders]

        # If a service failed for some reason (e.g., a network glitch), we
        # get no result back.  Remove empty results & go on with the rest.
        results = [x for x in results if x is not None]
        if not results:
            warn(f'Nothing to do for {item}')
            return

        # Create grid file if requested.
        if self._make_grid:
            base = path.basename(filename_basename(item_file))
            grid_file = path.realpath(
                path.join(dest_dir, base + '.handprint-all.png'))
            inform(f'Creating results grid image: {relative(grid_file)}')
            all_results = [r.annotated for r in results]
            width = math.ceil(math.sqrt(len(all_results)))
            from handprint.images import create_image_grid
            create_image_grid(all_results, grid_file, max_horizontal=width)

        # Clean up after ourselves.
        if not self._extended_results:
            for file in set(image.temp_files | {r.annotated for r in results}):
                if file and path.exists(file):
                    delete_existing(file)
        elif image.file != image.item_file:
            # Delete the resized file.  While it would help efficiency to
            # reuse it on subsequent runs, the risk is that those runs might
            # target different services and would end up using a different-
            # sized image than if we sized it appropriately for _this_ run.
            delete_existing(image.file)

        inform(f'Done with {relative(item)}')

Example #5

0

Show file

    def _save_article_portico(self, dest_dir, article, xmldict):
        article_dir = path.join(dest_dir, article.basename)
        jats_dir = path.join(article_dir, 'jats')
        try:
            os.makedirs(article_dir)
            if self.journal.uses_jats:
                os.makedirs(jats_dir)
        except FileExistsError:
            pass
        inform('Writing ' + article.doi)
        xml_file = xml_filename(article, article_dir)
        with open(xml_file, 'w', encoding='utf8') as f:
            if __debug__: log(f'writing XML to {xml_file}')
            f.write(xmltodict.unparse(xmldict, pretty=True))

        pdf_file = pdf_filename(article, article_dir)
        if __debug__: log(f'downloading PDF to {pdf_file}')
        if not download_file(article.pdf, pdf_file):
            warn(f'Could not download PDF file for {article.doi}')
            article.status = 'failed-pdf-download'

        if not self.journal.uses_jats:
            # Nothing more to do.
            return

        jats_file = jats_filename(article, jats_dir)
        if __debug__: log(f'downloading JATS XML to {jats_file}')
        if not download_file(article.jats, jats_file):
            warn(f'Could not download JATS file for {article.doi}')
            article.status = 'failed-jats-download'
        if self.do_validate:
            if not valid_xml(jats_file, self._dtd):
                warn(f'Failed to validate JATS for article {article.doi}')
                article.status = 'failed-jats-validation'
        else:
            if __debug__: log(f'skipping DTD validation of {jats_file}')

        # We need to store the image with the name that appears in the
        # JATS file. That requires a little extra work to extract.
        image_extension = filename_extension(article.image)
        image_file = image_filename(article, jats_dir, ext=image_extension)
        if article.image:
            if __debug__: log(f'downloading image file to {image_file}')
            if download_file(article.image, image_file):
                with Image.open(image_file) as img:
                    converted = image_without_alpha(img)
                    converted = converted.convert('RGB')
                    if __debug__: log(f'converting image to TIFF format')
                    tiff_name = filename_basename(image_file) + '.tif'
                    comments = tiff_comments(article, self.journal.name)
                    # Using save() means only the 1st frame of a multiframe
                    # image will be saved.
                    converted.save(tiff_name,
                                   compression=None,
                                   dpi=_TIFF_DPI,
                                   description=comments)
                # We keep only the uncompressed TIFF version.
                if __debug__: log(f'deleting original image file {image_file}')
                delete_existing(image_file)
            else:
                warn(f'Failed to download image for {article.doi}')
                article.status = 'failed-image-download'
        else:
            if __debug__: log(f'skipping empty image URL for {article.doi}')