def _resized_image(self, file): (max_width, max_height) = self._max_dimensions file_ext = filename_extension(file) say = self._say if file.find('-reduced') > 0: new_file = file else: new_file = filename_basename(file) + '-reduced' + file_ext if path.exists(new_file) and readable(new_file): (image_width, image_height) = image_dimensions(new_file) if image_width < max_width and image_height < max_height: say.info('Using reduced image found in {}'.format( relative(new_file))) return new_file else: # We found a "-reduced" file, perhaps from a previous run, but # for the current set of services, dimension are too large. if __debug__: log('existing resized file larger than {}x{}: {}', max_width, max_height, new_file) say.info('Dimensions too large; reducing dimensions: {}'.format( relative(file))) (resized, error) = reduced_image_dimensions(file, new_file, max_width, max_height) if error: say.error('Failed to re-dimension {}: {}'.format( relative(file), error)) return None return resized
def run(self, services, files, make_grid, compare): '''Run service(s) on files.''' # Set shortcut variables for better code readability below. base_name = self._base_name extended = self._extended from_file = self._from_file output_dir = self._output_dir threads = self._threads # Gather up some things and get prepared. targets = self.targets_from_arguments(files, from_file) if not targets: raise RuntimeError('No images to process; quitting.') num_targets = len(targets) inform('Will apply {} service{} ({}) to {} image{}.', len(services), 's' if len(services) > 1 else '', ', '.join(services), num_targets, 's' if num_targets > 1 else '') if self._extended: inform('Will save extended results.') inform('Will use up to {} process threads.', threads) # Get to work. if __debug__: log('initializing manager and starting processes') manager = Manager(services, threads, output_dir, make_grid, compare, extended) print_separators = num_targets > 1 for index, item in enumerate(targets, start=1): if print_separators: inform(styled('━' * 70, 'dark')) manager.run_services(item, index, base_name) if print_separators: inform(styled('━' * 70, 'dark'))
def _smaller_file(self, file): if not file: return None say = self._say file_ext = filename_extension(file) if file.find('-reduced') > 0: new_file = file else: new_file = filename_basename(file) + '-reduced' + file_ext if path.exists(new_file): if image_size(new_file) < self._max_size: say.info('Reusing resized image found in {}'.format( relative(new_file))) return new_file else: # We found a "-reduced" file, perhaps from a previous run, but # for the current set of services, it's larger than allowed. if __debug__: log('existing resized file larger than {}b: {}', humanize.intcomma(self._max_size), new_file) say.info('Size too large; reducing size: {}'.format(relative(file))) (resized, error) = reduced_image_size(file, new_file, self._max_size) if error: say.error('Failed to resize {}: {}'.format(relative(file), error)) return None return resized
def reduced_image(file, max_dimensions): '''Resizes the image and writes a new file named "ORIGINAL-reduced.EXT". Returns a tuple of (new_file, error). The value of 'error' will be None if no error occurred; otherwise, the value will be a string summarizing the error that occurred and 'new_file' will be set to None. ''' extension = filename_extension(file) dest_file = filename_basename(file) + '-reduced.' + extension with warnings.catch_warnings(): # Catch warnings from image conversion, like DecompressionBombWarning warnings.simplefilter('ignore') try: im = Image.open(file) dims = im.size width_ratio = max_dimensions[0] / dims[0] length_ratio = max_dimensions[1] / dims[1] ratio = min(width_ratio, length_ratio) new_dims = (round(dims[0] * ratio), round(dims[1] * ratio)) if __debug__: log('Resizing image to {}', new_dims) resized = im.resize(new_dims, Image.HAMMING) resized.save(dest_file) if __debug__: log('Saved converted image to {}', dest_file) return (dest_file, None) except Exception as ex: return (None, str(ex))
def _image_from_file(self, file_path): '''Helper function for subclasses to read image files. Returns a tuple, (image, error), where "error" is a TRResult with a non-empty error field value if an error occurred, and "image" is the bytes of the image if it was successfully read. ''' def error_result(error_text): return (None, TRResult(path = file_path, data = {}, text = '', error = error_text, boxes = [])) if not readable(file_path): return error_result('Unable to read file: {}'.format(file_path)) if __debug__: log('reading image file {} for {}', file_path, self.name()) image = open(file_path, 'rb').read() if len(image) == 0: return error_result('Empty file: {}'.format(file_path)) if len(image) > self.max_size(): text = 'Exceeds {} byte limit for service: {}'.format(self.max_size(), file_path) return error_result(text) width, height = imagesize.get(file_path) if __debug__: log('image size is width = {}, height = {}', width, height) if self.max_dimensions(): max_width, max_height = self.max_dimensions() if width > max_width or height > max_height: text = 'Image dimensions {}x{} exceed {} limits: {}'.format( width, height, self.name(), file_path) return error_result(text) return (image, None)
def __init__(self, base_name, extended, from_file, output_dir, threads): '''Initialize internal state and prepare for running services.''' if not network_available(): raise ServiceFailure('No network.') if from_file: if not path.exists(from_file): raise RuntimeError('File not found: {}'.format(from_file)) if not readable(from_file): raise RuntimeError('File not readable: {}'.format(from_file)) if output_dir: if path.isdir(output_dir): if not writable(output_dir): raise RuntimeError( 'Directory not writable: {}'.format(output_dir)) else: os.mkdir(output_dir) if __debug__: log('created output_dir directory {}', output_dir) self._base_name = base_name self._extended = extended self._from_file = from_file self._output_dir = output_dir self._threads = threads
def init_credentials(self, credentials_dir = None): '''Initializes the credentials to use for accessing this service.''' if __debug__: log('Getting credentials from {}', credentials_dir) try: self._credentials = MicrosoftCredentials(credentials_dir).creds() except Exception as ex: raise AuthenticationFailure(str(ex))
def targets_from_arguments(images, from_file, given_urls, say): targets = [] if from_file: if __debug__: log('Opening {}', from_file) with open(from_file) as f: targets = f.readlines() targets = [line.rstrip('\n') for line in targets] if __debug__: log('Read {} lines from {}.', len(targets), from_file) if not given_urls: targets = filter_urls(targets, say) elif given_urls: # We assume that the arguments are URLs and take them as-is. targets = images else: # We were given files and/or directories. Look for image files. # Ignore files that appear to be the previous output of Handprint. # These are files that end in, e.g., ".google.jpg" handprint_endings = ['.' + x + '.jpg' for x in KNOWN_METHODS.keys()] non_urls = filter_urls(images, say) non_urls = filter_endings(non_urls, handprint_endings) for item in non_urls: if path.isfile(item) and filename_extension( item) in ACCEPTED_FORMATS: targets.append(item) elif path.isdir(item): files = files_in_directory(item, extensions=ACCEPTED_FORMATS) files = filter_endings(files, handprint_endings) targets += files else: say.warn('"{}" not a file or directory'.format(item)) return targets
def _save_output(self, result, file): say = self._say # First perform some sanity checks. if result is None: say.warn('No data for {}'.format(file)) return if isinstance(result, tuple): # Assumes 2 elements: data, and error (data, error) = result if error: say.error('Error: {}'.format(error)) say.warn('Unable to write {}'.format(file)) return else: result = data if __debug__: log('writing output to file {}', relative(file)) if isinstance(result, str): with open(file, 'w') as f: f.write(result) elif isinstance(result, io.BytesIO): with open(file, 'wb') as f: shutil.copyfileobj(result, f) else: # There's no other type in the code, so if we get here ... raise InternalError( 'Unexpected data in save_output() -- please report this.')
def init_credentials(self): '''Initializes the credentials to use for accessing this service.''' try: if __debug__: log('initializing credentials') GoogleCredentials() except Exception as ex: raise AuthFailure(str(ex))
def rename(f): backup = f + '.bak' # If we fail, we just give up instead of throwing an exception. try: if __debug__: log('Renaming existing file {}', f) os.rename(f, backup) except: return
def copy_file(src, dst): '''Copy a file from "src" to "dst".''' if __debug__: log('copying file {} to {}', src, dst) try: shutil.copy2(src, dst, follow_symlinks=True) except: if __debug__: log('shutils.copy2() failed; trying copy()') shutil.copy(src, dst, follow_symlinks=True)
def save_credentials(self, service, supplied_file): if not path.isdir(Credentials.creds_dir): if __debug__: log('creating credentials dir: {}.', Credentials.creds_dir) make_dir(Credentials.creds_dir) dest_file = path.join(Credentials.creds_dir, credentials_filename(service)) copy_file(supplied_file, dest_file)
def network_available(): '''Return True if it appears we have a network connection, False if not.''' try: r = requests.get("https://www.caltech.edu") return True except requests.ConnectionError: if __debug__: log('Could not connect to https://www.caltech.edu') return False
def make_dir(dir_path): '''Creates directory 'dir_path' (including intermediate directories).''' if path.isdir(dir_path): if __debug__: log('reusing existing directory {}', dir_path) return else: if __debug__: log('creating directory {}', dir_path) # If this gets an exception, let it bubble up to caller. os.makedirs(dir_path)
def init_credentials(self, credentials_dir = None): '''Initializes the credentials to use for accessing this service.''' # Haven't been able to get this to work by reading the credentials: # self.credentials = GoogleCredentials(credentials_dir).creds() if __debug__: log('Getting credentials from {}', credentials_dir) try: GoogleCredentials(credentials_dir) except Exception as ex: raise AuthenticationFailure(str(ex))
def convert_image(file, from_format, to_format): '''Returns a tuple of (success, output file, error message).''' dest_file = filename_basename(file) + '.' + to_format try: im = Image.open(file) im.save(dest_file, to_format) if __debug__: log('Saved converted image to {}', dest_file) return (True, dest_file, '') except Exception as err: return (False, None, str(err))
def open_file(file): '''Open document with default application in Python.''' # Code originally from https://stackoverflow.com/a/435669/743730 if __debug__: log('opening file {}', file) if sys.platform.startswith('darwin'): subprocess.call(('open', file)) elif os.name == 'nt': os.startfile(file) elif os.name == 'posix': subprocess.call(('xdg-open', file))
def network_available(): '''Return True if it appears we have a network connection, False if not.''' r = None try: r = urllib.request.urlopen("http://www.google.com") return True except Exception: if __debug__: log('could not connect to https://www.google.com') return False if r: r.close()
def reduced_image_size(orig_file, dest_file, max_size): '''Resizes the image and writes a new file named "ORIGINAL-reduced.EXT". Returns a tuple of (new_file, error). The value of 'error' will be None if no error occurred; otherwise, the value will be a string summarizing the error that occurred and 'new_file' will be set to None. ''' with warnings.catch_warnings(): # Catch warnings from image conversion, like DecompressionBombWarning warnings.simplefilter('ignore') try: i_size = image_size(orig_file) if i_size <= max_size: if __debug__: log('file already smaller than requested: {}', orig_file) return (orig_file, None) ratio = max_size / i_size if __debug__: log('resize ratio = {}', ratio) im = Image.open(orig_file) dims = im.size new_dims = (round(dims[0] * ratio), round(dims[1] * ratio)) if __debug__: log('resizing image to {}', new_dims) resized = im.resize(new_dims, Image.HAMMING) if __debug__: log('saving resized image to {}', dest_file) if orig_file == dest_file: im.seek(0) resized.save(dest_file) return (dest_file, None) except Exception as ex: return (None, str(ex))
def net(get_or_post, url, polling=False, **kwargs): '''Gets or posts the 'url' with optional keyword arguments provided. Returns a tuple of (response, exception), where the first element is the response from the get or post http call, and the second element is an exception object if an exception occurred. If no exception occurred, the second element will be None. This allows the caller to inspect the response even in cases where exceptions are raised. If keyword 'polling' is True, certain statuses like 404 are ignored and the response is returned; otherwise, they are considered errors. ''' try: if __debug__: log('HTTP {} {}', get_or_post, url) http_method = requests.get if get_or_post == 'get' else requests.post req = http_method(url, **kwargs) except requests.exceptions.ConnectionError as ex: if ex.args and isinstance(ex.args[0], urllib3.exceptions.MaxRetryError): return (req, NetworkFailure('Unable to resolve destination host')) else: return (req, NetworkFailure(str(ex))) except requests.exceptions.InvalidSchema as ex: return (req, NetworkFailure('Unsupported network protocol')) except Exception as ex: return (req, ex) # Interpret the response. code = req.status_code error = None if code in [404, 410] and not polling: error = NetworkFailure("No content found at this location") elif code in [401, 402, 403, 407, 451, 511]: error = AuthenticationFailure( "Access is forbidden or requires authentication") elif code in [405, 406, 409, 411, 412, 414, 417, 428, 431, 505, 510]: error = ServiceFailure( "Server sent {} -- please report this".format(code)) elif code in [415, 416]: error = ServiceFailure("Server rejected the request") elif code == 429: error = RateLimitExceeded( "Server blocking further requests due to rate limits") elif code == 503: error = ServiceFailure("Server is unavailable -- try again later") elif code in [500, 501, 502, 506, 507, 508]: error = ServiceFailure("Internal server error") elif not (200 <= code < 400): error = NetworkFailure("Unable to resolve URL") return (req, error)
def __init__(self): cfile = path.join(self.credentials_dir(), credentials_filename('google')) if __debug__: log('credentials file for google is {}', cfile) if not path.exists(cfile): raise AuthFailure('Credentials for Google have not been installed') elif not readable(cfile): raise AuthFailure( 'Google credentials file unreadable: {}'.format(cfile)) # Haven't been able to make it work; only the environment variable # approach has been working for me. # # with open(self.credentials_file, 'r') as file: # self.credentials = json.load(file) os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = cfile
def __init__(self): cfile = path.join(self.credentials_dir(), credentials_filename('amazon')) if __debug__: log('credentials file for amazon is {}', cfile) if not path.exists(cfile): raise AuthFailure('Credentials for Amazon have not been installed') elif not readable(cfile): raise AuthFailure( 'Amazon credentials file unreadable: {}'.format(cfile)) try: with open(cfile, 'r') as file: self.credentials = json.load(file) except Exception as ex: raise AuthFailure( 'Unable to parse Amazon exceptions file: {}'.format(str(ex)))
def converted_image(file, to_format): '''Returns a tuple of (success, output file, error message). Returns a tuple of (new_file, error). The value of 'error' will be None if no error occurred; otherwise, the value will be a string summarizing the error that occurred and 'new_file' will be set to None. ''' dest_file = filename_basename(file) + '.' + to_format # When converting images, PIL may issue a DecompressionBombWarning but # it's not a concern in our application. Ignore it. with warnings.catch_warnings(): warnings.simplefilter('ignore') try: im = Image.open(file) im.save(dest_file, canonical_format_name(to_format)) if __debug__: log('Saved converted image to {}', dest_file) return (dest_file, None) except Exception as ex: return (None, str(ex))
def targets_from_arguments(self, files, from_file): targets = [] if from_file: if __debug__: log('Opening {}', from_file) with open(from_file) as f: targets = f.readlines() targets = [line.rstrip('\n') for line in targets] if __debug__: log('Read {} lines from {}.', len(targets), from_file) else: for item in files: if is_url(item): targets.append(item) elif path.isfile(item) and filename_extension( item) in ACCEPTED_FORMATS: targets.append(item) elif path.isdir(item): # It's a directory, so look for files within. # Ignore files that appear to be the previous output of Handprint. # (These are files that end in, e.g., ".google.png") handprint_endings = [ '.' + x + _OUTPUT_EXT for x in services_list() ] files = files_in_directory(item, extensions=ACCEPTED_FORMATS) files = filter_by_extensions(files, handprint_endings) targets += files else: self._say.warn('"{}" not a file or directory'.format(item)) # Filter files we created in past runs. targets = [x for x in targets if x.find('-reduced') < 0] targets = [x for x in targets if x.find('all-results') < 0] # If there is both a file in the format we generate and another # format of that file, ignore the other formats and just use ours. keep = [] for item in targets: ext = filename_extension(item) base = filename_basename(item) if ext != _OUTPUT_EXT and (base + _OUTPUT_EXT in targets): # png version of file is also present => skip this other version continue keep.append(item) return keep
def __init__(self): cfile = path.join(self.credentials_dir(), credentials_filename('microsoft')) if __debug__: log('credentials file for microsoft is {}', cfile) if not path.exists(cfile): raise AuthFailure( 'Credentials for Microsoft have not been installed') elif not readable(cfile): raise AuthFailure( 'Microsoft credentials file unreadable: {}'.format(cfile)) try: with open(cfile, 'r') as file: creds = json.load(file) self.credentials = creds['subscription_key'] except Exception as ex: raise AuthFailure( 'Unable to parse Microsoft exceptions file: {}'.format( str(ex)))
def handprint_path(): '''Returns the path to where Handprint is installed.''' # The path returned by module.__path__ is to the directory containing # the __init__.py file. What we want here is the path to the installation # of the Handprint binary. if sys.platform.startswith('win'): from winreg import OpenKey, CloseKey, QueryValueEx, HKEY_LOCAL_MACHINE, KEY_READ try: if __debug__: log('reading Windows registry entry') key = OpenKey(HKEY_LOCAL_MACHINE, _HANDPRINT_REG_PATH) value, regtype = QueryValueEx(key, 'Path') CloseKey(key) if __debug__: log('path to windows installation: {}'.format(value)) return value except WindowsError: # Kind of a problem. Punt and return a default value. return path.abspath('C:\Program Files\Handprint') else: return path.abspath(path.join(module_path(), '..'))
def _get(self, item, base_name, index): # Shortcuts to make the code more readable. output_dir = self._output_dir say = self._say # For URLs, we download the corresponding files and name them with # the base_name. if is_url(item): # First make sure the URL actually points to an image. if __debug__: log('testing if URL contains an image: {}', item) try: response = urllib.request.urlopen(item) except Exception as ex: say.warn('Skipping URL due to error: {}'.format(ex)) return (None, None) if response.headers.get_content_maintype() != 'image': say.warn('Did not find an image at {}'.format(item)) return (None, None) orig_fmt = response.headers.get_content_subtype() base = '{}-{}'.format(base_name, index) # If we weren't given an output dir, then for URLs, we have no # choice but to use the current dir to download the file. # Important: don't change self._output_dir because if other # inputs *are* files, then those files will need other output dirs. if not output_dir: output_dir = os.getcwd() file = path.realpath(path.join(output_dir, base + '.' + orig_fmt)) if not download_file(item, file, say): say.warn('Unable to download {}'.format(item)) return (None, None) url_file = path.realpath(path.join(output_dir, base + '.url')) with open(url_file, 'w') as f: f.write(url_file_content(item)) say.info('Wrote URL to {}'.format(relative(url_file))) else: file = path.realpath(path.join(os.getcwd(), item)) orig_fmt = filename_extension(file)[1:] if __debug__: log('{} has original format {}', relative(file), orig_fmt) return (file, orig_fmt)
def annotated_image(file, text_boxes, service): service_name = service.name() fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 20)) axes.get_xaxis().set_visible(False) axes.get_yaxis().set_visible(False) axes.set_title(service_name, color='r', fontweight='bold', fontsize=22) if __debug__: log('reading image file for {}: {}', service_name, relative(file)) img = mpimg.imread(file) axes.imshow(img, cmap="gray") props = dict(facecolor='white', alpha=0.7) if text_boxes: if __debug__: log('adding {} annotations for {}', len(text_boxes), service_name) polygons = [(item.boundingBox, item.text) for item in text_boxes] for polygon in polygons: vertices = [(polygon[0][i], polygon[0][i + 1]) for i in range(0, len(polygon[0]), 2)] x = max(0, vertices[0][0] - 4) y = max(0, vertices[0][1] - 8) text = polygon[1] plt.text(x, y, text, color='r', fontsize=11, va="top", bbox=props) if __debug__: log('generating png for {} for {}', service_name, relative(file)) buf = io.BytesIO() fig.savefig(buf, format='png', dpi=300, bbox_inches='tight', pad_inches=0) buf.flush() buf.seek(0) plt.close(fig) return buf
def targets_from_arguments(images, from_file, given_urls, say): targets = [] if from_file: with open(from_file) as f: targets = f.readlines() targets = [line.rstrip('\n') for line in targets] if __debug__: log('Read {} lines from "{}".', len(targets), from_file) if not given_urls: targets = filter_urls(targets, say) elif given_urls: # We assume that the arguments are URLs and take them as-is. targets = images else: # We were given files and/or directories. Look for image files. for item in filter_urls(images, say): if path.isfile(item) and filename_extension(item) in ACCEPTED_FORMATS: targets.append(item) elif path.isdir(item): targets += files_in_directory(item, extensions = ACCEPTED_FORMATS) else: say.warn('"{}" not a file or directory'.format(item)) return targets