def dispatch(self, file_key): """ Callback method for getObject from s3 """ if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.error( "ERROR retrieving image from S3 {0}: {1}". format(self.key, str(file_key))) # If we got here, there was a failure. # We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if not file_key: result.error = LoaderResult.ERROR_UPSTREAM self.callback(result) return response_metadata = file_key.get('ResponseMetadata', {}) status_code = response_metadata.get('HTTPStatusCode') if status_code == 404: result.error = LoaderResult.ERROR_NOT_FOUND self.callback(result) return if self.retries_counter < self.max_retry: self.__increment_retry_counter() self.bucket_loader.get(self.key, callback=self.dispatch) else: result.error = LoaderResult.ERROR_UPSTREAM self.callback(result) else: self.callback(file_key['Body'].read())
def load(context, path, callback): file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith( abspath(context.config.FILE_LOADER_ROOT_PATH)) result = LoaderResult() if not inside_root_path: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if exists(file_path): with open(file_path, 'rb') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, path, callback): file_path = join( context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith( abspath(context.config.FILE_LOADER_ROOT_PATH)) result = LoaderResult() if not inside_root_path: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def dispatch(self, file_key): """ Callback method for getObject from s3 """ if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.error("ERROR retrieving image from S3 {0}: {1}".format( self.key, str(file_key))) # If we got here, there was a failure. # We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if not file_key: result.error = LoaderResult.ERROR_UPSTREAM self.callback(result) return response_metadata = file_key.get('ResponseMetadata', {}) status_code = response_metadata.get('HTTPStatusCode') if status_code == 404: result.error = LoaderResult.ERROR_NOT_FOUND self.callback(result) return if self.retries_counter < self.max_retry: self.__increment_retry_counter() self.bucket_loader.get(self.key, callback=self.dispatch) else: result.error = LoaderResult.ERROR_UPSTREAM self.callback(result) else: self.callback(file_key['Body'].read())
def handle_data(file_key): if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.warn("ERROR retrieving image from S3 {0}: {1}".format(key, str(file_key))) # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if file_key and file_key.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404: result.error = LoaderResult.ERROR_NOT_FOUND else: result.error = LoaderResult.ERROR_UPSTREAM callback(result) else: callback(file_key['Body'].read())
def return_contents(response, url, context, req_start=None): res = urlparse(url) if req_start: finish = datetime.datetime.now() context.metrics.timing( "original_image.fetch.{0}.{1}".format(response.code, res.netloc.replace(".", "_")), (finish - req_start).total_seconds() * 1000, ) context.metrics.incr("original_image.fetch.{0}.{1}".format( response.code, res.netloc.replace(".", "_"))) result = LoaderResult() context.metrics.incr("original_image.status." + str(response.code)) context.metrics.incr("original_image.status.{0}.{1}".format( response.code, res.netloc.replace(".", "_"))) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warning(u"ERROR retrieving image %s: %s", url, str(response.error)) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warning(u"ERROR retrieving image %s: Empty response.", url) else: if response.time_info: for metric_name in response.time_info: context.metrics.timing( "original_image.time_info." + metric_name, response.time_info[metric_name] * 1000, ) context.metrics.timing( "original_image.time_info.bytes_per_second", len(response.body) // response.time_info["total"], ) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr("original_image.response_bytes", len(response.body)) return result
def handle_data(file_key): if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.warn("ERROR retrieving image from S3 {0}: {1}".format( key, str(file_key))) # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if file_key and file_key.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404: result.error = LoaderResult.ERROR_NOT_FOUND else: result.error = LoaderResult.ERROR_UPSTREAM callback(result) else: callback(file_key['Body'].read())
def return_contents_error(data, callback, *args, **kwargs): if not data: return data = data.split('\n')[0] logger.error(data) result = LoaderResult() result.successful = False if 'Failed to resolve hostname' in data: result.error = LoaderResult.ERROR_UPSTREAM elif 'Failed to resolve hostname' in data: result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND callback(result)
def load(context, path, callback): file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith( context.config.FILE_LOADER_ROOT_PATH) if inside_root_path and is_video(file_path): # Extract a frame from the video and load it instead of the original path logger.warning('processing video... %s', file_path) with get_video_frame(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return elif inside_root_path and is_pdf(file_path): # extract first page of pdf and load it logger.warning('processing pdf... %s', file_path) with get_pdf_page(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return else: # First attempt to load with file_loader file_loader.load(context, path, callback) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, path, callback): """ Loads a file. In case the requested file is a video, instead of loading its contents this method extracts a frame from the video using ffmpeg, and returns the image. :param Context context: Thumbor's context :param string url: Path to load :param callable callback: Callback method once done """ file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH) if inside_root_path and exists(file_path): if is_video(file_path): # Extract a frame from the video and load it instead of the original path with get_video_frame(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return else: callback(read_file(file_path)) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, path, callback): """ Loads a file. In case the requested file is a video, instead of loading its contents this method extracts a frame from the video using ffmpeg, and returns the image. :param Context context: Thumbor's context :param string url: Path to load :param callable callback: Callback method once done """ file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip("/"), unquote(path).lstrip("/")) file_path = abspath(file_path) inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH) if inside_root_path and exists(file_path): if is_video(file_path): # Extract a frame from the video and load it instead of the original path with get_video_frame(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return else: callback(read_file(file_path)) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(self, path, callback): db = __conn__(self) words2 = path.split("/") storage = self.config.MONGO_ORIGIN_SERVER_COLLECTION images = gridfs.GridFS(db, collection=storage) result = LoaderResult() if ObjectId.is_valid(words2[0]): if images.exists(ObjectId(words2[0])): contents = images.get(ObjectId(words2[0])).read() result.successful = True result.buffer = contents else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def callback_wrapper(result): r = LoaderResult() if result is not None: r.successful = True r.buffer = result else: r.error = LoaderResult.ERROR_NOT_FOUND r.successful = False callback(r)
def return_contents(response, url, callback, context, req_start=None): if req_start: finish = datetime.datetime.now() res = urlparse(url) context.metrics.timing( 'original_image.fetch.{0}.{1}'.format(response.code, res.netloc.replace('.', '_')), (finish - req_start).total_seconds() * 1000, ) result = LoaderResult() context.metrics.incr('original_image.status.' + str(response.code)) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warning(u"ERROR retrieving image {0}: {1}".format( url, str(response.error))) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url)) else: if response.time_info: for x in response.time_info: context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000) context.metrics.timing( 'original_image.time_info.bytes_per_second', len(response.body) // response.time_info['total']) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr('original_image.response_bytes', len(response.body)) callback(result)
def return_contents(response, url, callback, context, req_start=None): if req_start: finish = datetime.datetime.now() res = urlparse(url) context.metrics.timing( 'original_image.fetch.{0}.{1}'.format(response.code, res.netloc), (finish - req_start).total_seconds() * 1000, ) result = LoaderResult() context.metrics.incr('original_image.status.' + str(response.code)) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warn(u"ERROR retrieving image {0}: {1}".format( url, str(response.error))) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url)) else: if response.time_info: for x in response.time_info: context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000) context.metrics.timing( 'original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total']) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr('original_image.response_bytes', len( response.body)) callback(result)
def callback_wrapper(result): if result.successful: callback(result) else: # If file_loader failed try http_loader if (path.find('http') != -1): http_loader.load(context, path, callback) else: result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, url, callback, normalize_url_func=_normalize_url): logger.warn( '>>> %s URL | ' % url) if is_video(url): # Extract a frame from the video and load it instead of the original path with get_video_frame(context, url, normalize_url_func) as image_path: if image_path: callback(read_file(image_path)) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return load_sync(context, url, callback, normalize_url_func)
def load(context, path, callback): bucket_id = context.config.get("CLOUD_STORAGE_BUCKET_ID") project_id = context.config.get("CLOUD_STORAGE_PROJECT_ID") bucket = buckets[project_id].get(bucket_id, None) if bucket is None: client = storage.Client(project_id) bucket = client.get_bucket(bucket_id) buckets[project_id][bucket_id] = bucket blob = bucket.get_blob(path) if blob: callback(blob.download_as_string()) else: result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND callback(result)
def load(context, path, callback): result = LoaderResult() conn = boto.connect_s3( aws_access_key_id = context.config.AWS_S3_ACCESS_KEY, aws_secret_access_key = context.config.AWS_S3_SECRET_KEY, host=context.config.AWS_HOST, calling_format = boto.s3.connection.OrdinaryCallingFormat(), ) bucket = conn.get_bucket(context.config.AWS_S3_BUCKET,validate=False) data = bucket.get_key(path) if data is None: result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND else: result.buffer = data.get_contents_as_string() result.successful = True callback(result)
async def load(context, path): # First attempt to load with file_loader result = await file_loader.load(context, path) if result.successful: return result # If file_loader failed try http_loader if not http_loader.validate(context, path): result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_BAD_REQUEST result.extras["reason"] = "Unallowed domain" result.extras["source"] = path return result return await http_loader.load(context, path)
def load(context, path, callback): file_path = abspath(join(dirname(__file__), "fixtures/images/image.jpg")) result = LoaderResult() if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime) ) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, path, callback): file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH) result = LoaderResult() if inside_root_path and exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) result.successful = True response = f.read() excerpt_length = context.config.LOADER_EXCERPT_LENGTH result.buffer = response[:excerpt_length] if len(result.buffer) == excerpt_length: temp = NamedTemporaryFile(delete=False) temp.write(response) temp.close() context.wikimedia_original_file = temp tornado.ioloop.IOLoop.instance().call_later( context.config.HTTP_LOADER_TEMP_FILE_TIMEOUT, partial( cleanup_temp_file, context.wikimedia_original_file.name ) ) result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime) ) else: result.error = 404 result.successful = False callback(result)
def load(context, path, callback): result = LoaderResult() for idx, next_dir in enumerate(context.config.TC_MULTIDIR_PATHS): file_path = join(next_dir.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith(abspath(next_dir)) if inside_root_path: # keep backwards compatibility, try the actual path first # if not found, unquote it and try again found = exists(file_path) if not found: file_path = unquote(file_path) found = exists(file_path) if found: with open(file_path, 'rb') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime)) callback(result) return logger.debug('TC_MULTIDIR: File {0} not found in {1}'.format(path, next_dir)) # else loop and try next directory if not context.config.TC_MULTIDIR_PATHS: logger.error('TC_MULTIDIR: No paths set in configuration TC_MULTIDIR_PATHS') # no file found result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, url, callback): url = quote_url(url) match = S3_RE.match(url) def callback_wrapper(result): if result.successful: callback(result) else: logger.info('s3 {0}'.format( os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/'))) ) # If not on efs, try s3 S3Loader.load(context, os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')), callback) # If melody s3 file, first try to load from efs if match: logger.info('BOTO {0}'.format(match.group('path'))) # TEMP try s3 direct S3Loader.load(context, os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')), callback) # FileLoader.load(context, match.group('path'), callback_wrapper) # else get from the internet elif HTTP_RE.match(url): logger.info('WEB {0}'.format(url)) HttpLoader.load(context, url, callback) else: logger.info('FILE {0}'.format(url)) result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND # callback(result) # TEMP enable file loader FileLoader.load(context, url, callback)
def load_override(context, path, callback): result = LoaderResult() result.successful = False result.error = 409 callback(result)
def get_not_found_result(): # type: () -> LoaderResult result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result
async def load_override(*_): result = LoaderResult() result.successful = False result.error = 409 return result
def load(context, path, callback): file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'), path.lstrip('/')) file_path_two = abspath(file_path_two) inside_root_path = file_path.startswith( abspath(context.config.PIC_LOADER_ROOT_PATH)) inside_root_path_two = file_path_two.startswith( abspath(context.config.PIC_LOADER_FALLBACK_PATH)) result = LoaderResult() if not inside_root_path: if not inside_root_path_two: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if not exists(file_path_two): file_path_two = unquote(file_path_two) if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) if stats.st_size <= 1: logger.warning(u"%s: cette image source est vide...", file_path) result.successful = False result.error = LoaderResult.ERROR_UPSTREAM else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) elif exists(file_path_two): with open(file_path_two, 'r') as f: stats = fstat(f.fileno()) if stats.st_size <= 4: logger.warning(u"%s: cette image source est vide...", file_path_two) result.successful = False result.error = LoaderResult.ERROR_UPSTREAM else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load_sync(context, url, callback): logger.debug('[SWIFT_LOADER] load_sync: %s' % url, extra=log_extra(context)) result = LoaderResult() container = context.wikimedia_original_container path = context.wikimedia_original_filepath try: logger.debug( '[SWIFT_LOADER] fetching %s from container %s' % (path, container), extra=log_extra(context) ) start = datetime.datetime.now() logging.disable(logging.ERROR) headers, response = swift(context).get_object( container, path ) logging.disable(logging.NOTSET) record_timing(context, datetime.datetime.now() - start, 'swift.original.read.success', 'Thumbor-Swift-Original-Success-Time') context.metrics.incr('swift_loader.status.success') # XXX hack: If the file is an STL, we overwrite the first five bytes # with the word "solid", to trick the MIME detection pipeline. extension = path[-4:].lower() isSTL = extension == '.stl' f = NamedTemporaryFile(delete=False) logger.debug( '[SWIFT_LOADER] writing %d bytes to temp file' % len(response), extra=log_extra(context) ) f.write(response) f.close() excerpt_length = context.config.LOADER_EXCERPT_LENGTH # First kb of the body for MIME detection body = response[:excerpt_length] # See above - text STLs have this string here anyway, and # binary STLs ignore the first 80 bytes, so this string will # be ignored. if isSTL: body = 'solid' + body[5:] if len(body) == excerpt_length: logger.debug('[SWIFT_LOADER] return_contents: %s' % f.name, extra=log_extra(context)) context.wikimedia_original_file = f tornado.ioloop.IOLoop.instance().call_later( context.config.HTTP_LOADER_TEMP_FILE_TIMEOUT, partial( cleanup_temp_file, context, context.wikimedia_original_file.name ) ) else: logger.debug('[SWIFT_LOADER] return_contents: small body') cleanup_temp_file(context, f.name) result.buffer = body except ClientException as e: record_timing(context, datetime.datetime.now() - start, 'swift.original.read.miss', 'Thumbor-Swift-Original-Miss-Time') logging.disable(logging.NOTSET) result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND logger.error('[SWIFT_LOADER] get_object failed: %s %r' % (url, e), extra=log_extra(context)) context.metrics.incr('swift_loader.status.client_exception') except requests.ConnectionError as e: record_timing(context, datetime.datetime.now() - start, 'swift.original.read.exception', 'Thumbor-Swift-Original-Exception-Time') logging.disable(logging.NOTSET) result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.error('[SWIFT_LOADER] get_object failed: %s %r' % (url, e), extra=log_extra(context)) context.metrics.incr('swift_loader.status.connection_error') callback(result)
def load(context, path): file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'), path.lstrip('/')) file_path_two = abspath(file_path_two) inside_root_path = file_path.startswith( abspath(context.config.PIC_LOADER_ROOT_PATH)) inside_root_path_two = file_path_two.startswith( abspath(context.config.PIC_LOADER_FALLBACK_PATH)) if not exists(context.config.PIC_LOADER_MAX_SIZE): oversize = context.config.PIC_LOADER_MAX_SIZE else: oversize = 16777216 result = LoaderResult() if not inside_root_path: if not inside_root_path_two: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result #callback(result) pass #return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if not exists(file_path_two): file_path_two = unquote(file_path_two) if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) if stats.st_size >= oversize: result.error = LoaderResult.COULD_NOT_LOAD_IMAGE result.successful = False else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) elif exists(file_path_two): with open(file_path_two, 'r') as f: stats = fstat(f.fileno()) if stats.st_size >= oversize: result.error = LoaderResult.COULD_NOT_LOAD_IMAGE result.successful = False else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result
def return_contents(response, url, callback, context, req_start=None): if req_start: finish = datetime.datetime.now() res = urlparse(url) context.metrics.timing( 'original_image.fetch.{0}.{1}'.format(response.code, res.netloc), (finish - req_start).total_seconds() * 1000, ) result = LoaderResult() context.metrics.incr('original_image.status.' + str(response.code)) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warn(u"ERROR retrieving image {0}: {1}".format( url, str(response.error))) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url)) else: if response.time_info: for x in response.time_info: context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000) context.metrics.timing( 'original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total']) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr('original_image.response_bytes', len(response.body)) if context.config.NORMALIZE_TO_72DPI and context.config.CONVERT_PATH: if not exists(context.config.CONVERT_PATH): logger.warn( 'imagemagick/convert enabled but binary CONVERT_PATH does not exist' ) else: try: if "jpeg" in result.metadata['Content-Type']: command = [ context.config.CONVERT_PATH + ' - -density 72,72 -strip - ', ] normalize_dpi_cmd = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True) normalize_dpi_stdout, normalize_dpi_stderr = normalize_dpi_cmd.communicate( input=response.body) if normalize_dpi_cmd.returncode != 0: logger.warn( 'dpi normalization finished with non-zero return code (%d): %s' % (normalize_dpi_cmd.returncode, normalize_dpi_stderr)) else: result.buffer = normalize_dpi_stdout except KeyError: logger.warn('Content-Type KeyError on %s' % url) callback(result)