Esempio n. 1
0
 def _resized_image(self, file):
     (max_width, max_height) = self._max_dimensions
     file_ext = filename_extension(file)
     say = self._say
     if file.find('-reduced') > 0:
         new_file = file
     else:
         new_file = filename_basename(file) + '-reduced' + file_ext
     if path.exists(new_file) and readable(new_file):
         (image_width, image_height) = image_dimensions(new_file)
         if image_width < max_width and image_height < max_height:
             say.info('Using reduced image found in {}'.format(
                 relative(new_file)))
             return new_file
         else:
             # We found a "-reduced" file, perhaps from a previous run, but
             # for the current set of services, dimension are too large.
             if __debug__:
                 log('existing resized file larger than {}x{}: {}',
                     max_width, max_height, new_file)
     say.info('Dimensions too large; reducing dimensions: {}'.format(
         relative(file)))
     (resized, error) = reduced_image_dimensions(file, new_file, max_width,
                                                 max_height)
     if error:
         say.error('Failed to re-dimension {}: {}'.format(
             relative(file), error))
         return None
     return resized
Esempio n. 2
0
 def _smaller_file(self, file):
     if not file:
         return None
     say = self._say
     file_ext = filename_extension(file)
     if file.find('-reduced') > 0:
         new_file = file
     else:
         new_file = filename_basename(file) + '-reduced' + file_ext
     if path.exists(new_file):
         if image_size(new_file) < self._max_size:
             say.info('Reusing resized image found in {}'.format(
                 relative(new_file)))
             return new_file
         else:
             # We found a "-reduced" file, perhaps from a previous run, but
             # for the current set of services, it's larger than allowed.
             if __debug__:
                 log('existing resized file larger than {}b: {}',
                     humanize.intcomma(self._max_size), new_file)
     say.info('Size too large; reducing size: {}'.format(relative(file)))
     (resized, error) = reduced_image_size(file, new_file, self._max_size)
     if error:
         say.error('Failed to resize {}: {}'.format(relative(file), error))
         return None
     return resized
Esempio n. 3
0
def file_after_converting(file, to_format, tool, spinner):
    new_file = filename_basename(file) + '.' + to_format
    if path.exists(new_file):
        spinner.update('Using converted image found in {}'.format(
            relative(new_file)))
        return new_file
    else:
        spinner.update('Converting to {} format: {}'.format(
            to_format, relative(file)))
        (converted, error) = converted_image(file, to_format)
        if not converted:
            spinner.fail('Failed to convert {}: {}'.format(
                relative(file), error))
            return None
        return converted
Esempio n. 4
0
def file_after_resizing(file, tool, spinner):
    file_ext = filename_extension(file)
    new_file = filename_basename(file) + '-reduced.' + file_ext
    if path.exists(new_file):
        spinner.update('Using reduced image found in {}'.format(
            relative(new_file)))
        return new_file
    else:
        spinner.update('Original image too large; reducing size')
        (resized, error) = reduced_image(file, tool.max_dimensions())
        if not resized:
            spinner.fail('Failed to resize {}: {}'.format(relative(
                file, error)))
            return None
        return resized
Esempio n. 5
0
 def _converted_file(self, file, to_format, dest_dir):
     basename = path.basename(filename_basename(file))
     new_file = path.join(dest_dir, basename + '.' + to_format)
     say = self._say
     if path.exists(new_file):
         say.info('Using already converted image in {}'.format(
             relative(new_file)))
         return new_file
     else:
         say.info('Converting to {} format: {}'.format(
             to_format, relative(file)))
         (converted, error) = converted_image(file, to_format, new_file)
         if error:
             say.error('Failed to convert {}: {}'.format(
                 relative(file), error))
             return None
         return converted
Esempio n. 6
0
def converted_image(orig_file, to_format, dest_file=None):
    '''Returns a tuple of (success, output file, error message).
    Returns a tuple of (new_file, error).  The value of 'error' will be None
    if no error occurred; otherwise, the value will be a string summarizing the
    error that occurred and 'new_file' will be set to None.
    '''
    dest_format = canonical_format_name(to_format)
    if dest_file is None:
        dest_file = filename_basename(file) + '.' + dest_format
    # PIL is unable to read PDF files, so in that particular case, we have to
    # convert it using another tool.
    if filename_extension(orig_file) == '.pdf':
        import fitz
        doc = fitz.open(orig_file)
        if len(doc) >= 1:
            if len(doc) >= 2:
                if __debug__:
                    log('{} has > 1 images; using only 1st', orig_file)
            # FIXME: if there's more than 1 image, we could extra the rest.
            # Doing so will require some architectural changes first.
            if __debug__: log('extracting 1st image from {}', dest_file)
            page = doc[0]
            pix = page.getPixmap(alpha=False)
            if __debug__: log('writing {}', dest_file)
            pix.writeImage(dest_file, dest_format)
            return (dest_file, None)
        else:
            if __debug__:
                log('fitz says there is no image image in {}', orig_file)
            return (None, 'Cannot find an image inside {}'.format(orig_file))
    else:
        # When converting images, PIL may issue a DecompressionBombWarning but
        # it's not a concern in our application.  Ignore it.
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            try:
                im = Image.open(orig_file)
                if __debug__: log('converting {} to RGB', orig_file)
                im.convert('RGB')
                if __debug__: log('saving converted image to {}', dest_file)
                if orig_file == dest_file:
                    im.seek(0)
                im.save(dest_file, dest_format)
                return (dest_file, None)
            except Exception as ex:
                return (None, str(ex))
Esempio n. 7
0
    def targets_from_arguments(self, files, from_file):
        targets = []
        if from_file:
            if __debug__: log('Opening {}', from_file)
            with open(from_file) as f:
                targets = f.readlines()
            targets = [line.rstrip('\n') for line in targets]
            if __debug__:
                log('Read {} lines from {}.', len(targets), from_file)
        else:
            for item in files:
                if is_url(item):
                    targets.append(item)
                elif path.isfile(item) and filename_extension(
                        item) in ACCEPTED_FORMATS:
                    targets.append(item)
                elif path.isdir(item):
                    # It's a directory, so look for files within.
                    # Ignore files that appear to be the previous output of Handprint.
                    # (These are files that end in, e.g., ".google.png")
                    handprint_endings = [
                        '.' + x + _OUTPUT_EXT for x in services_list()
                    ]
                    files = files_in_directory(item,
                                               extensions=ACCEPTED_FORMATS)
                    files = filter_by_extensions(files, handprint_endings)
                    targets += files
                else:
                    self._say.warn('"{}" not a file or directory'.format(item))
        # Filter files we created in past runs.
        targets = [x for x in targets if x.find('-reduced') < 0]
        targets = [x for x in targets if x.find('all-results') < 0]

        # If there is both a file in the format we generate and another
        # format of that file, ignore the other formats and just use ours.
        keep = []
        for item in targets:
            ext = filename_extension(item)
            base = filename_basename(item)
            if ext != _OUTPUT_EXT and (base + _OUTPUT_EXT in targets):
                # png version of file is also present => skip this other version
                continue
            keep.append(item)
        return keep
Esempio n. 8
0
    def targets_from_arguments(self, files, from_file):
        targets = []
        if from_file:
            if __debug__: log('reading {}', from_file)
            targets = filter(None, open(from_file).read().splitlines())
        else:
            for item in files:
                if is_url(item):
                    targets.append(item)
                elif path.isfile(item) and filename_extension(
                        item) in ACCEPTED_FORMATS:
                    targets.append(item)
                elif path.isdir(item):
                    # It's a directory, so look for files within.
                    targets += files_in_directory(item,
                                                  extensions=ACCEPTED_FORMATS)
                else:
                    warn('"{}" not a file or directory', item)

        # Filter files created in past runs.
        targets = filter(lambda name: '.handprint' not in name, targets)

        # If there is both a file in the format we generate and another
        # format of that file, ignore the other formats and just use ours.
        # Note: the value of targets is an iterator, but b/c it's tested inside
        # the loop, a separate list is needed (else get unexpected results).
        targets = list(targets)
        keep = []
        for item in targets:
            ext = filename_extension(item)
            base = filename_basename(item)
            if ext != _OUTPUT_EXT and (base + _OUTPUT_EXT in targets):
                # png version of file is also present => skip this other version
                continue
            keep.append(item)
        return keep
Esempio n. 9
0
    def run_services(self, item, index, base_name):
        '''Run all requested services on the image indicated by "item", using
        "index" and "base_name" to construct a download copy of the item if
        it has to be downloaded from a URL first.
        '''
        # Shortcuts to make the code more readable.
        services = self._services
        output_dir = self._output_dir
        say = self._say

        try:
            say.info('Starting on {}'.format(
                styled(item, 'white') if say.use_color() else item))

            (file, orig_fmt) = self._get(item, base_name, index)
            if not file:
                return

            dest_dir = output_dir if output_dir else path.dirname(file)
            if not writable(dest_dir):
                say.error('Cannot write output in {}.'.format(dest_dir))
                return

            # Sanity check
            if not path.getsize(file) > 0:
                say.warn('Skipping zero-length file {}'.format(relative(file)))
                return

            # Save grid file name now, because it's based on the original file.
            basename = path.basename(filename_basename(file))
            grid_file = path.realpath(
                path.join(dest_dir, basename + '.all-results.png'))

            # We will usually delete temporary files we create.
            to_delete = set()

            # Normalize to the lowest common denominator.
            (new_file,
             intermediate_files) = self._normalized(file, orig_fmt, dest_dir)
            if not new_file:
                say.warn('Skipping {}'.format(relative(file)))
                return
            file = new_file
            if intermediate_files:
                to_delete.update(intermediate_files)

            # Send the file to the services.  If the number of threads is set
            # to 1, we force non-thread-pool execution to make debugging easier.
            results = []
            if self._num_threads == 1:
                results = [self._send(file, s, dest_dir) for s in services]
            else:
                with ThreadPoolExecutor(
                        max_workers=self._num_threads) as executor:
                    results = list(
                        executor.map(self._send, repeat(file), iter(services),
                                     repeat(dest_dir)))

            # If a service failed for some reason (e.g., a network glitch), we
            # get no result back.  Remove empty results & go on with the rest.
            results = [x for x in results if x is not None]
            to_delete.update(results)

            # Create grid file if requested.
            if self._make_grid:
                say.info('Creating results grid image: {}'.format(
                    relative(grid_file)))
                create_image_grid(results, grid_file, max_horizontal=2)

            # Clean up after ourselves.
            if self._make_grid and not self._extended_results:
                for image_file in to_delete:
                    delete_existing(image_file)

            say.info('Done with {}'.format(relative(item)))
        except (KeyboardInterrupt, UserCancelled) as ex:
            say.warn('Interrupted')
            raise
        except Exception as ex:
            say.error('Stopping due to a problem')
            raise