Exemple #1
0
def load_file(context, file_path, callback):
    inside_root_path = file_path.startswith(
        abspath(context.config.FILE_LOADER_ROOT_PATH))

    result = LoaderResult()

    if not inside_root_path:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        callback(result)
        return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            result.successful = True
            result.buffer = f.read()

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.datetime.utcfromtimestamp(stats.st_mtime))
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
Exemple #2
0
async def dummy_http_load(context, url, normalize_url_func=None):  # pylint: disable=unused-argument
    result = LoaderResult(
        successful=True,
        buffer="http",
    )

    return result
Exemple #3
0
async def load(context, path):
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip("/"), path.lstrip("/"))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(
        abspath(context.config.FILE_LOADER_ROOT_PATH)
    )

    result = LoaderResult()

    if not inside_root_path:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        return result

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if exists(file_path):
        with open(file_path, "rb") as source_file:
            stats = fstat(source_file.fileno())

            result.successful = True
            result.buffer = source_file.read()

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.utcfromtimestamp(stats.st_mtime),
            )
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    return result
Exemple #4
0
def load(context, path, callback):
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'),
                     unquote(path).lstrip('/'))
    file_path = abspath(file_path)

    inside_root_path = file_path.startswith(
        context.config.FILE_LOADER_ROOT_PATH)

    if inside_root_path and is_video(file_path):
        # Extract a frame from the video and load it instead of the original path
        logger.warning('processing video... %s', file_path)
        with get_video_frame(context, file_path) as image_path:
            if image_path:
                callback(read_file(image_path))
                return
    elif inside_root_path and is_pdf(file_path):
        # extract first page of pdf and load it
        logger.warning('processing pdf... %s', file_path)
        with get_pdf_page(context, file_path) as image_path:
            if image_path:
                callback(read_file(image_path))
                return
    else:
        # First attempt to load with file_loader
        file_loader.load(context, path, callback)
        return

    # If we got here, there was a failure
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
Exemple #5
0
def load(context, url, callback, normalize_url_func=_normalize_url):
    result = LoaderResult()
    start = time.perf_counter()

    try:
        result.buffer = ffmpeg(context, normalize_url_func(url))
    except subprocess.CalledProcessError as err:
        result.successful = False
        result.error = err.stderr.decode('utf-8').strip()

        logger.warn(f'ERROR retrieving image {url}: {result.error}')
        if result.error.lower().endswith(
                'Server returned 404 not found'.lower()):
            result.error = LoaderResult.ERROR_NOT_FOUND
    except Exception as err:
        result.successful = False
        result.error = str(err)
        logger.warn(f'ERROR retrieving image {url}: {err}')
    else:
        total_time = (time.perf_counter() - start)
        total_bytes = len(result.buffer)

        result.metadata.update({
            'size': total_bytes,
            # 'updated_at': datetime.datetime.utcnow(),
        })

        context.metrics.incr('original_image.status.200')
        context.metrics.incr('original_image.response_bytes', total_bytes)
        context.metrics.timing(f'original_image.fetch.{url}',
                               total_time * 1000)
        context.metrics.timing('original_image.time_info.bytes_per_second',
                               total_bytes / total_time)

    return callback(result)
Exemple #6
0
def return_contents(response, url, callback, context, req_start=None):
    if req_start:
        finish = datetime.datetime.now()
        res = urlparse(url)
        context.metrics.timing(
            'original_image.fetch.{0}.{1}'.format(response.code, res.netloc),
            (finish - req_start).total_seconds() * 1000
        )

    result = LoaderResult()
    context.metrics.incr('original_image.status.' + str(response.code))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warn(u"ERROR retrieving image {0}: {1}".format(url, str(response.error)))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url))
    else:
        if response.time_info:
            for x in response.time_info:
                context.metrics.timing('original_image.time_info.' + x, response.time_info[x] * 1000)
            context.metrics.timing('original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total'])
        result.buffer = response.body
        context.metrics.incr('original_image.response_bytes', len(response.body))

    callback(result)
Exemple #7
0
def load(context, path, callback):
    """
    Loads a file. In case the requested file is a video, instead of loading
    its contents this method extracts a frame from the video using ffmpeg,
    and returns the image.
    :param Context context: Thumbor's context
    :param string url: Path to load
    :param callable callback: Callback method once done
    """
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/'))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH)

    if inside_root_path and exists(file_path):

        if is_video(file_path):
            # Extract a frame from the video and load it instead of the original path
            with get_video_frame(context, file_path) as image_path:
                if image_path:
                    callback(read_file(image_path))
                    return
        else:
            callback(read_file(file_path))
            return

    # If we got here, there was a failure
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
Exemple #8
0
def load(context, url, callback):
    """
    Loads image
    :param Context context: Thumbor's context
    :param string url: Path to load
    :param callable callback: Callback method once done
    """
    if _use_http_loader(context, url):
        http_loader.load_sync(context, url, callback, normalize_url_func=http_loader._normalize_url)
        return

    bucket, key = _get_bucket_and_key(context, url)

    if not _validate_bucket(context, bucket):
        result = LoaderResult(successful=False,
                              error=LoaderResult.ERROR_NOT_FOUND)
        callback(result)
        return

    bucket_loader = Bucket(bucket, context.config.get('TC_AWS_REGION'),
                           context.config.get('TC_AWS_ENDPOINT'))

    handle_data = HandleDataFunc.as_func(key,
                                         callback=callback,
                                         bucket_loader=bucket_loader,
                                         max_retry=context.config.get('TC_AWS_MAX_RETRY'))

    bucket_loader.get(key, callback=handle_data)
Exemple #9
0
    def dispatch(self, file_key):
        """ Callback method for getObject from s3 """
        if not file_key or 'Error' in file_key or 'Body' not in file_key:

            logger.error(
                "ERROR retrieving image from S3 {0}: {1}".
                format(self.key, str(file_key)))

            # If we got here, there was a failure.
            # We will return 404 if S3 returned a 404, otherwise 502.
            result = LoaderResult()
            result.successful = False

            if not file_key:
                result.error = LoaderResult.ERROR_UPSTREAM
                return result

            response_metadata = file_key.get('ResponseMetadata', {})
            status_code = response_metadata.get('HTTPStatusCode')

            if status_code == 404:
                result.error = LoaderResult.ERROR_NOT_FOUND
                return result

            if self.retries_counter < self.max_retry:
                self.__increment_retry_counter()
                self.bucket_loader.get(self.key,
                                       callback=self.dispatch)
            else:
                result.error = LoaderResult.ERROR_UPSTREAM
                return result
        else:
            return file_key['Body'].read()
def _parse_time_status(context, url, callback, process, status):
    if status != 0:
        result = LoaderResult()
        result.successful = False
        callback(result)
    else:
        process.stdout.read_until_close(
            partial(_parse_time, context, url, callback))
Exemple #11
0
    def callback_wrapper(result):
        r = LoaderResult()
        if result is not None:
            r.successful = True
            r.buffer = result
        else:
            r.error = LoaderResult.ERROR_NOT_FOUND
            r.successful = False

        callback(r)
Exemple #12
0
def read_file(file_path):
    """
    Read the given file path and its metadata. Returns a LoaderResult.
    """
    with open(file_path, 'r') as f:
        stats = fstat(f.fileno())
        return LoaderResult(buffer=f.read(),
                            successful=True,
                            metadata=dict(size=stats.st_size,
                                          updated_at=datetime.utcfromtimestamp(
                                              stats.st_mtime)))
def _process_output(callback, destination_name, status):
    result = LoaderResult()

    if status != 0:
        result.successful = False
    else:
        result.successful = True
        with open(destination_name, 'rb') as f:
            result.buffer = f.read()
        os.remove(destination_name)

    callback(result)
 def callback_wrapper(result):
     if result.successful:
         callback(result)
     else:
         # If file_loader failed try http_loader
         if (path.find('http') != -1):
             http_loader.load(context, path, callback)
         else:
             result = LoaderResult()
             result.error = LoaderResult.ERROR_NOT_FOUND
             result.successful = False
             callback(result)
Exemple #15
0
def return_contents(response, url, context, req_start=None):
    res = urlparse(url)
    if req_start:
        finish = datetime.datetime.now()
        context.metrics.timing(
            "original_image.fetch.{0}.{1}".format(response.code,
                                                  res.netloc.replace(".",
                                                                     "_")),
            (finish - req_start).total_seconds() * 1000,
        )
        context.metrics.incr("original_image.fetch.{0}.{1}".format(
            response.code, res.netloc.replace(".", "_")))

    result = LoaderResult()
    context.metrics.incr("original_image.status." + str(response.code))
    context.metrics.incr("original_image.status.{0}.{1}".format(
        response.code, res.netloc.replace(".", "_")))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warning(u"ERROR retrieving image %s: %s", url,
                       str(response.error))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warning(u"ERROR retrieving image %s: Empty response.", url)
    else:
        if response.time_info:
            for metric_name in response.time_info:
                context.metrics.timing(
                    "original_image.time_info." + metric_name,
                    response.time_info[metric_name] * 1000,
                )
            context.metrics.timing(
                "original_image.time_info.bytes_per_second",
                len(response.body) // response.time_info["total"],
            )
        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr("original_image.response_bytes",
                             len(response.body))

    return result
def load(self, path, callback):
    db = __conn__(self)
    words2 = path.split("/")
    storage = self.config.MONGO_ORIGIN_SERVER_COLLECTION
    images = gridfs.GridFS(db, collection=storage)
    result = LoaderResult()
    if images.exists(ObjectId(words2[0])):
        contents = images.get(ObjectId(words2[0])).read()
        result.successful = True
        result.buffer = contents
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
    callback(result)
Exemple #17
0
 def handle_data(file_key):
     if not file_key or 'Error' in file_key or 'Body' not in file_key:
         logger.warn("ERROR retrieving image from S3 {0}: {1}".format(
             key, str(file_key)))
         # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502.
         result = LoaderResult()
         result.successful = False
         if file_key and file_key.get('ResponseMetadata',
                                      {}).get('HTTPStatusCode') == 404:
             result.error = LoaderResult.ERROR_NOT_FOUND
         else:
             result.error = LoaderResult.ERROR_UPSTREAM
         callback(result)
     else:
         callback(file_key['Body'].read())
def load(context, path, callback):
    bucket_id = context.config.get("CLOUD_STORAGE_BUCKET_ID")
    project_id = context.config.get("CLOUD_STORAGE_PROJECT_ID")
    bucket = buckets[project_id].get(bucket_id, None)
    if bucket is None:
        client = storage.Client(project_id)
        bucket = client.get_bucket(bucket_id)
        buckets[project_id][bucket_id] = bucket

    blob = bucket.get_blob(path)
    if blob:
        callback(blob.download_as_string())
    else:
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_NOT_FOUND
        callback(result)
Exemple #19
0
def load(context, url, callback, normalize_url_func=_normalize_url):
    logger.warn( '>>> %s URL | ' % url)
    if is_video(url):
        # Extract a frame from the video and load it instead of the original path
        with get_video_frame(context, url, normalize_url_func) as image_path:
            if image_path:
                callback(read_file(image_path))
                return

        # If we got here, there was a failure
        result = LoaderResult()
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        callback(result)
        return

    load_sync(context, url, callback, normalize_url_func)
def load(context, path, callback):
    file_path = abspath(join(dirname(__file__), "fixtures/images/image.jpg"))
    result = LoaderResult()
    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            result.successful = True
            result.buffer = f.read()

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.utcfromtimestamp(stats.st_mtime)
            )
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
Exemple #21
0
async def load(context, path):
    # First attempt to load with file_loader
    result = await file_loader.load(context, path)

    if result.successful:
        return result

    # If file_loader failed try http_loader

    if not http_loader.validate(context, path):
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_BAD_REQUEST
        result.extras["reason"] = "Unallowed domain"
        result.extras["source"] = path

        return result

    return await http_loader.load(context, path)
Exemple #22
0
def load(context, path, callback):
    result = LoaderResult()

    for idx, next_dir in enumerate(context.config.TC_MULTIDIR_PATHS):

        file_path = join(next_dir.rstrip('/'), path.lstrip('/'))
        file_path = abspath(file_path)

        inside_root_path = file_path.startswith(abspath(next_dir))

        if inside_root_path:
            
            # keep backwards compatibility, try the actual path first
            # if not found, unquote it and try again
            found = exists(file_path)
            if not found:
                file_path = unquote(file_path)
                found = exists(file_path)

            if found:
                with open(file_path, 'rb') as f:
                    stats = fstat(f.fileno())

                    result.successful = True
                    result.buffer = f.read()

                    result.metadata.update(
                        size=stats.st_size,
                        updated_at=datetime.utcfromtimestamp(stats.st_mtime))
                callback(result)
                return

        logger.debug('TC_MULTIDIR: File {0} not found in {1}'.format(path, next_dir))
        # else loop and try next directory
    
    if not context.config.TC_MULTIDIR_PATHS:
        logger.error('TC_MULTIDIR: No paths set in configuration TC_MULTIDIR_PATHS')

    # no file found
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
Exemple #23
0
def load(context, url, callback):
    url = quote_url(url)
    match = S3_RE.match(url)

    def callback_wrapper(result):
        if result.successful:
            callback(result)
        else:
            logger.info('s3 {0}'.format(
                os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')))
            )

            # If not on efs, try s3
            S3Loader.load(context,
                          os.path.join(match.group('bucket').rstrip('/'),
                                       match.group('path').lstrip('/')),
                          callback)

    # If melody s3 file, first try to load from efs
    if match:
        logger.info('BOTO {0}'.format(match.group('path')))

        # TEMP try s3 direct
        S3Loader.load(context,
                      os.path.join(match.group('bucket').rstrip('/'),
                                   match.group('path').lstrip('/')),
                      callback)

        # FileLoader.load(context, match.group('path'), callback_wrapper)
    # else get from the internet
    elif HTTP_RE.match(url):
        logger.info('WEB {0}'.format(url))
        HttpLoader.load(context, url, callback)
    else:
        logger.info('FILE {0}'.format(url))
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_NOT_FOUND
        # callback(result)
        # TEMP enable file loader
        FileLoader.load(context, url, callback)
Exemple #24
0
async def load(context, url):
    """
    Loads image
    :param Context context: Thumbor's context
    :param string url: Path to load
    """
    if _use_http_loader(context, url):
        return await http_loader.load(context, url, normalize_url_func=http_loader._normalize_url)

    bucket, key = _get_bucket_and_key(context, url)

    if not _validate_bucket(context, bucket):
        result = LoaderResult(successful=False,
                              error=LoaderResult.ERROR_NOT_FOUND)
        return result

    loader = Bucket(bucket, context.config.get('TC_AWS_REGION'), context.config.get('TC_AWS_ENDPOINT'))
    handle_data = HandleDataFunc.as_func(key,
                                         bucket_loader=loader,
                                         max_retry=context.config.get('TC_AWS_MAX_RETRY'))

    file_key = await loader.get(key)
    return handle_data(file_key)
Exemple #25
0
def get_not_found_result():
    # type: () -> LoaderResult
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    return result
Exemple #26
0
def return_contents(response, url, callback, context, req_start=None):
    if req_start:
        finish = datetime.datetime.now()
        res = urlparse(url)
        context.metrics.timing(
            'original_image.fetch.{0}.{1}'.format(response.code, res.netloc),
            (finish - req_start).total_seconds() * 1000,
        )

    result = LoaderResult()
    context.metrics.incr('original_image.status.' + str(response.code))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warn(u"ERROR retrieving image {0}: {1}".format(
            url, str(response.error)))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url))
    else:
        if response.time_info:
            for x in response.time_info:
                context.metrics.timing('original_image.time_info.' + x,
                                       response.time_info[x] * 1000)
            context.metrics.timing(
                'original_image.time_info.bytes_per_second',
                len(response.body) / response.time_info['total'])

        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr('original_image.response_bytes',
                             len(response.body))

        if context.config.NORMALIZE_TO_72DPI and context.config.CONVERT_PATH:
            if not exists(context.config.CONVERT_PATH):
                logger.warn(
                    'imagemagick/convert enabled but binary CONVERT_PATH does not exist'
                )
            else:
                try:
                    if "jpeg" in result.metadata['Content-Type']:
                        command = [
                            context.config.CONVERT_PATH +
                            ' - -density 72,72 -strip - ',
                        ]

                        normalize_dpi_cmd = Popen(command,
                                                  stdin=PIPE,
                                                  stdout=PIPE,
                                                  stderr=PIPE,
                                                  close_fds=True,
                                                  shell=True)

                        normalize_dpi_stdout, normalize_dpi_stderr = normalize_dpi_cmd.communicate(
                            input=response.body)

                        if normalize_dpi_cmd.returncode != 0:
                            logger.warn(
                                'dpi normalization finished with non-zero return code (%d): %s'
                                % (normalize_dpi_cmd.returncode,
                                   normalize_dpi_stderr))
                        else:
                            result.buffer = normalize_dpi_stdout
                except KeyError:
                    logger.warn('Content-Type KeyError on %s' % url)
    callback(result)
 async def load_override(*_):
     result = LoaderResult()
     result.successful = False
     result.error = 409
     return result
Exemple #28
0
 def load_override(context, path, callback):
     result = LoaderResult()
     result.successful = False
     result.error = 409
     callback(result)
Exemple #29
0
from unittest import TestCase
from tests.base import TestCase as AsyncTestCase
from preggy import expect

import thumbor

from thumbor.context import Context
from thumbor.config import Config
from thumbor.loaders import LoaderResult

import thumbor.loaders.file_loader_http_fallback as loader

STORAGE_PATH = abspath(join(dirname(__file__), '../fixtures/images/'))

result = LoaderResult()
result.successful = True


def dummy_file_load(context, url, callback, normalize_url_func={}):
    result.buffer = 'file'
    callback(result)


def dummy_http_load(context, url, callback, normalize_url_func={}):
    result.buffer = 'http'
    callback(result)


class FileLoaderHttpFallbackFileTestCase(TestCase):
    def setUp(self):
def load(context, path):
    file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'),
                     path.lstrip('/'))
    file_path = abspath(file_path)
    file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'),
                         path.lstrip('/'))
    file_path_two = abspath(file_path_two)
    inside_root_path = file_path.startswith(
        abspath(context.config.PIC_LOADER_ROOT_PATH))
    inside_root_path_two = file_path_two.startswith(
        abspath(context.config.PIC_LOADER_FALLBACK_PATH))

    if not exists(context.config.PIC_LOADER_MAX_SIZE):
        oversize = context.config.PIC_LOADER_MAX_SIZE
    else:
        oversize = 16777216

    result = LoaderResult()

    if not inside_root_path:
        if not inside_root_path_two:
            result.error = LoaderResult.ERROR_NOT_FOUND
            result.successful = False
            return result  #callback(result)
        pass  #return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if not exists(file_path_two):
        file_path_two = unquote(file_path_two)

    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())
            if stats.st_size >= oversize:
                result.error = LoaderResult.COULD_NOT_LOAD_IMAGE
                result.successful = False
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))
    elif exists(file_path_two):
        with open(file_path_two, 'r') as f:
            stats = fstat(f.fileno())
            if stats.st_size >= oversize:
                result.error = LoaderResult.COULD_NOT_LOAD_IMAGE
                result.successful = False
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))

    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    return result