def load_file(context, file_path, callback): inside_root_path = file_path.startswith( abspath(context.config.FILE_LOADER_ROOT_PATH)) result = LoaderResult() if not inside_root_path: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.datetime.utcfromtimestamp(stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
async def dummy_http_load(context, url, normalize_url_func=None): # pylint: disable=unused-argument result = LoaderResult( successful=True, buffer="http", ) return result
async def load(context, path): file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip("/"), path.lstrip("/")) file_path = abspath(file_path) inside_root_path = file_path.startswith( abspath(context.config.FILE_LOADER_ROOT_PATH) ) result = LoaderResult() if not inside_root_path: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if exists(file_path): with open(file_path, "rb") as source_file: stats = fstat(source_file.fileno()) result.successful = True result.buffer = source_file.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime), ) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result
def load(context, path, callback): file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith( context.config.FILE_LOADER_ROOT_PATH) if inside_root_path and is_video(file_path): # Extract a frame from the video and load it instead of the original path logger.warning('processing video... %s', file_path) with get_video_frame(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return elif inside_root_path and is_pdf(file_path): # extract first page of pdf and load it logger.warning('processing pdf... %s', file_path) with get_pdf_page(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return else: # First attempt to load with file_loader file_loader.load(context, path, callback) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, url, callback, normalize_url_func=_normalize_url): result = LoaderResult() start = time.perf_counter() try: result.buffer = ffmpeg(context, normalize_url_func(url)) except subprocess.CalledProcessError as err: result.successful = False result.error = err.stderr.decode('utf-8').strip() logger.warn(f'ERROR retrieving image {url}: {result.error}') if result.error.lower().endswith( 'Server returned 404 not found'.lower()): result.error = LoaderResult.ERROR_NOT_FOUND except Exception as err: result.successful = False result.error = str(err) logger.warn(f'ERROR retrieving image {url}: {err}') else: total_time = (time.perf_counter() - start) total_bytes = len(result.buffer) result.metadata.update({ 'size': total_bytes, # 'updated_at': datetime.datetime.utcnow(), }) context.metrics.incr('original_image.status.200') context.metrics.incr('original_image.response_bytes', total_bytes) context.metrics.timing(f'original_image.fetch.{url}', total_time * 1000) context.metrics.timing('original_image.time_info.bytes_per_second', total_bytes / total_time) return callback(result)
def return_contents(response, url, callback, context, req_start=None): if req_start: finish = datetime.datetime.now() res = urlparse(url) context.metrics.timing( 'original_image.fetch.{0}.{1}'.format(response.code, res.netloc), (finish - req_start).total_seconds() * 1000 ) result = LoaderResult() context.metrics.incr('original_image.status.' + str(response.code)) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warn(u"ERROR retrieving image {0}: {1}".format(url, str(response.error))) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url)) else: if response.time_info: for x in response.time_info: context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000) context.metrics.timing('original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total']) result.buffer = response.body context.metrics.incr('original_image.response_bytes', len(response.body)) callback(result)
def load(context, path, callback): """ Loads a file. In case the requested file is a video, instead of loading its contents this method extracts a frame from the video using ffmpeg, and returns the image. :param Context context: Thumbor's context :param string url: Path to load :param callable callback: Callback method once done """ file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH) if inside_root_path and exists(file_path): if is_video(file_path): # Extract a frame from the video and load it instead of the original path with get_video_frame(context, file_path) as image_path: if image_path: callback(read_file(image_path)) return else: callback(read_file(file_path)) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, url, callback): """ Loads image :param Context context: Thumbor's context :param string url: Path to load :param callable callback: Callback method once done """ if _use_http_loader(context, url): http_loader.load_sync(context, url, callback, normalize_url_func=http_loader._normalize_url) return bucket, key = _get_bucket_and_key(context, url) if not _validate_bucket(context, bucket): result = LoaderResult(successful=False, error=LoaderResult.ERROR_NOT_FOUND) callback(result) return bucket_loader = Bucket(bucket, context.config.get('TC_AWS_REGION'), context.config.get('TC_AWS_ENDPOINT')) handle_data = HandleDataFunc.as_func(key, callback=callback, bucket_loader=bucket_loader, max_retry=context.config.get('TC_AWS_MAX_RETRY')) bucket_loader.get(key, callback=handle_data)
def dispatch(self, file_key): """ Callback method for getObject from s3 """ if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.error( "ERROR retrieving image from S3 {0}: {1}". format(self.key, str(file_key))) # If we got here, there was a failure. # We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if not file_key: result.error = LoaderResult.ERROR_UPSTREAM return result response_metadata = file_key.get('ResponseMetadata', {}) status_code = response_metadata.get('HTTPStatusCode') if status_code == 404: result.error = LoaderResult.ERROR_NOT_FOUND return result if self.retries_counter < self.max_retry: self.__increment_retry_counter() self.bucket_loader.get(self.key, callback=self.dispatch) else: result.error = LoaderResult.ERROR_UPSTREAM return result else: return file_key['Body'].read()
def _parse_time_status(context, url, callback, process, status): if status != 0: result = LoaderResult() result.successful = False callback(result) else: process.stdout.read_until_close( partial(_parse_time, context, url, callback))
def callback_wrapper(result): r = LoaderResult() if result is not None: r.successful = True r.buffer = result else: r.error = LoaderResult.ERROR_NOT_FOUND r.successful = False callback(r)
def read_file(file_path): """ Read the given file path and its metadata. Returns a LoaderResult. """ with open(file_path, 'r') as f: stats = fstat(f.fileno()) return LoaderResult(buffer=f.read(), successful=True, metadata=dict(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)))
def _process_output(callback, destination_name, status): result = LoaderResult() if status != 0: result.successful = False else: result.successful = True with open(destination_name, 'rb') as f: result.buffer = f.read() os.remove(destination_name) callback(result)
def callback_wrapper(result): if result.successful: callback(result) else: # If file_loader failed try http_loader if (path.find('http') != -1): http_loader.load(context, path, callback) else: result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def return_contents(response, url, context, req_start=None): res = urlparse(url) if req_start: finish = datetime.datetime.now() context.metrics.timing( "original_image.fetch.{0}.{1}".format(response.code, res.netloc.replace(".", "_")), (finish - req_start).total_seconds() * 1000, ) context.metrics.incr("original_image.fetch.{0}.{1}".format( response.code, res.netloc.replace(".", "_"))) result = LoaderResult() context.metrics.incr("original_image.status." + str(response.code)) context.metrics.incr("original_image.status.{0}.{1}".format( response.code, res.netloc.replace(".", "_"))) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warning(u"ERROR retrieving image %s: %s", url, str(response.error)) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warning(u"ERROR retrieving image %s: Empty response.", url) else: if response.time_info: for metric_name in response.time_info: context.metrics.timing( "original_image.time_info." + metric_name, response.time_info[metric_name] * 1000, ) context.metrics.timing( "original_image.time_info.bytes_per_second", len(response.body) // response.time_info["total"], ) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr("original_image.response_bytes", len(response.body)) return result
def load(self, path, callback): db = __conn__(self) words2 = path.split("/") storage = self.config.MONGO_ORIGIN_SERVER_COLLECTION images = gridfs.GridFS(db, collection=storage) result = LoaderResult() if images.exists(ObjectId(words2[0])): contents = images.get(ObjectId(words2[0])).read() result.successful = True result.buffer = contents else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def handle_data(file_key): if not file_key or 'Error' in file_key or 'Body' not in file_key: logger.warn("ERROR retrieving image from S3 {0}: {1}".format( key, str(file_key))) # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502. result = LoaderResult() result.successful = False if file_key and file_key.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404: result.error = LoaderResult.ERROR_NOT_FOUND else: result.error = LoaderResult.ERROR_UPSTREAM callback(result) else: callback(file_key['Body'].read())
def load(context, path, callback): bucket_id = context.config.get("CLOUD_STORAGE_BUCKET_ID") project_id = context.config.get("CLOUD_STORAGE_PROJECT_ID") bucket = buckets[project_id].get(bucket_id, None) if bucket is None: client = storage.Client(project_id) bucket = client.get_bucket(bucket_id) buckets[project_id][bucket_id] = bucket blob = bucket.get_blob(path) if blob: callback(blob.download_as_string()) else: result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND callback(result)
def load(context, url, callback, normalize_url_func=_normalize_url): logger.warn( '>>> %s URL | ' % url) if is_video(url): # Extract a frame from the video and load it instead of the original path with get_video_frame(context, url, normalize_url_func) as image_path: if image_path: callback(read_file(image_path)) return # If we got here, there was a failure result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result) return load_sync(context, url, callback, normalize_url_func)
def load(context, path, callback): file_path = abspath(join(dirname(__file__), "fixtures/images/image.jpg")) result = LoaderResult() if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime) ) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
async def load(context, path): # First attempt to load with file_loader result = await file_loader.load(context, path) if result.successful: return result # If file_loader failed try http_loader if not http_loader.validate(context, path): result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_BAD_REQUEST result.extras["reason"] = "Unallowed domain" result.extras["source"] = path return result return await http_loader.load(context, path)
def load(context, path, callback): result = LoaderResult() for idx, next_dir in enumerate(context.config.TC_MULTIDIR_PATHS): file_path = join(next_dir.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) inside_root_path = file_path.startswith(abspath(next_dir)) if inside_root_path: # keep backwards compatibility, try the actual path first # if not found, unquote it and try again found = exists(file_path) if not found: file_path = unquote(file_path) found = exists(file_path) if found: with open(file_path, 'rb') as f: stats = fstat(f.fileno()) result.successful = True result.buffer = f.read() result.metadata.update( size=stats.st_size, updated_at=datetime.utcfromtimestamp(stats.st_mtime)) callback(result) return logger.debug('TC_MULTIDIR: File {0} not found in {1}'.format(path, next_dir)) # else loop and try next directory if not context.config.TC_MULTIDIR_PATHS: logger.error('TC_MULTIDIR: No paths set in configuration TC_MULTIDIR_PATHS') # no file found result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False callback(result)
def load(context, url, callback): url = quote_url(url) match = S3_RE.match(url) def callback_wrapper(result): if result.successful: callback(result) else: logger.info('s3 {0}'.format( os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/'))) ) # If not on efs, try s3 S3Loader.load(context, os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')), callback) # If melody s3 file, first try to load from efs if match: logger.info('BOTO {0}'.format(match.group('path'))) # TEMP try s3 direct S3Loader.load(context, os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')), callback) # FileLoader.load(context, match.group('path'), callback_wrapper) # else get from the internet elif HTTP_RE.match(url): logger.info('WEB {0}'.format(url)) HttpLoader.load(context, url, callback) else: logger.info('FILE {0}'.format(url)) result = LoaderResult() result.successful = False result.error = LoaderResult.ERROR_NOT_FOUND # callback(result) # TEMP enable file loader FileLoader.load(context, url, callback)
async def load(context, url): """ Loads image :param Context context: Thumbor's context :param string url: Path to load """ if _use_http_loader(context, url): return await http_loader.load(context, url, normalize_url_func=http_loader._normalize_url) bucket, key = _get_bucket_and_key(context, url) if not _validate_bucket(context, bucket): result = LoaderResult(successful=False, error=LoaderResult.ERROR_NOT_FOUND) return result loader = Bucket(bucket, context.config.get('TC_AWS_REGION'), context.config.get('TC_AWS_ENDPOINT')) handle_data = HandleDataFunc.as_func(key, bucket_loader=loader, max_retry=context.config.get('TC_AWS_MAX_RETRY')) file_key = await loader.get(key) return handle_data(file_key)
def get_not_found_result(): # type: () -> LoaderResult result = LoaderResult() result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result
def return_contents(response, url, callback, context, req_start=None): if req_start: finish = datetime.datetime.now() res = urlparse(url) context.metrics.timing( 'original_image.fetch.{0}.{1}'.format(response.code, res.netloc), (finish - req_start).total_seconds() * 1000, ) result = LoaderResult() context.metrics.incr('original_image.status.' + str(response.code)) if response.error: result.successful = False if response.code == 599: # Return a Gateway Timeout status downstream if upstream times out result.error = LoaderResult.ERROR_TIMEOUT else: result.error = LoaderResult.ERROR_NOT_FOUND logger.warn(u"ERROR retrieving image {0}: {1}".format( url, str(response.error))) elif response.body is None or len(response.body) == 0: result.successful = False result.error = LoaderResult.ERROR_UPSTREAM logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url)) else: if response.time_info: for x in response.time_info: context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000) context.metrics.timing( 'original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total']) result.buffer = response.body result.metadata.update(response.headers) context.metrics.incr('original_image.response_bytes', len(response.body)) if context.config.NORMALIZE_TO_72DPI and context.config.CONVERT_PATH: if not exists(context.config.CONVERT_PATH): logger.warn( 'imagemagick/convert enabled but binary CONVERT_PATH does not exist' ) else: try: if "jpeg" in result.metadata['Content-Type']: command = [ context.config.CONVERT_PATH + ' - -density 72,72 -strip - ', ] normalize_dpi_cmd = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True) normalize_dpi_stdout, normalize_dpi_stderr = normalize_dpi_cmd.communicate( input=response.body) if normalize_dpi_cmd.returncode != 0: logger.warn( 'dpi normalization finished with non-zero return code (%d): %s' % (normalize_dpi_cmd.returncode, normalize_dpi_stderr)) else: result.buffer = normalize_dpi_stdout except KeyError: logger.warn('Content-Type KeyError on %s' % url) callback(result)
async def load_override(*_): result = LoaderResult() result.successful = False result.error = 409 return result
def load_override(context, path, callback): result = LoaderResult() result.successful = False result.error = 409 callback(result)
from unittest import TestCase from tests.base import TestCase as AsyncTestCase from preggy import expect import thumbor from thumbor.context import Context from thumbor.config import Config from thumbor.loaders import LoaderResult import thumbor.loaders.file_loader_http_fallback as loader STORAGE_PATH = abspath(join(dirname(__file__), '../fixtures/images/')) result = LoaderResult() result.successful = True def dummy_file_load(context, url, callback, normalize_url_func={}): result.buffer = 'file' callback(result) def dummy_http_load(context, url, callback, normalize_url_func={}): result.buffer = 'http' callback(result) class FileLoaderHttpFallbackFileTestCase(TestCase): def setUp(self):
def load(context, path): file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/')) file_path = abspath(file_path) file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'), path.lstrip('/')) file_path_two = abspath(file_path_two) inside_root_path = file_path.startswith( abspath(context.config.PIC_LOADER_ROOT_PATH)) inside_root_path_two = file_path_two.startswith( abspath(context.config.PIC_LOADER_FALLBACK_PATH)) if not exists(context.config.PIC_LOADER_MAX_SIZE): oversize = context.config.PIC_LOADER_MAX_SIZE else: oversize = 16777216 result = LoaderResult() if not inside_root_path: if not inside_root_path_two: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result #callback(result) pass #return # keep backwards compatibility, try the actual path first # if not found, unquote it and try again if not exists(file_path): file_path = unquote(file_path) if not exists(file_path_two): file_path_two = unquote(file_path_two) if exists(file_path): with open(file_path, 'r') as f: stats = fstat(f.fileno()) if stats.st_size >= oversize: result.error = LoaderResult.COULD_NOT_LOAD_IMAGE result.successful = False else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) elif exists(file_path_two): with open(file_path_two, 'r') as f: stats = fstat(f.fileno()) if stats.st_size >= oversize: result.error = LoaderResult.COULD_NOT_LOAD_IMAGE result.successful = False else: result.successful = True result.buffer = f.read() result.metadata.update(size=stats.st_size, updated_at=datetime.utcfromtimestamp( stats.st_mtime)) else: result.error = LoaderResult.ERROR_NOT_FOUND result.successful = False return result