Exemple #1
0
    def dispatch(self, file_key):
        """ Callback method for getObject from s3 """
        if not file_key or 'Error' in file_key or 'Body' not in file_key:

            logger.error(
                "ERROR retrieving image from S3 {0}: {1}".
                format(self.key, str(file_key)))

            # If we got here, there was a failure.
            # We will return 404 if S3 returned a 404, otherwise 502.
            result = LoaderResult()
            result.successful = False

            if not file_key:
                result.error = LoaderResult.ERROR_UPSTREAM
                self.callback(result)
                return

            response_metadata = file_key.get('ResponseMetadata', {})
            status_code = response_metadata.get('HTTPStatusCode')

            if status_code == 404:
                result.error = LoaderResult.ERROR_NOT_FOUND
                self.callback(result)
                return

            if self.retries_counter < self.max_retry:
                self.__increment_retry_counter()
                self.bucket_loader.get(self.key,
                                       callback=self.dispatch)
            else:
                result.error = LoaderResult.ERROR_UPSTREAM
                self.callback(result)
        else:
            self.callback(file_key['Body'].read())
Exemple #2
0
def load(context, path, callback):
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'),
                     path.lstrip('/'))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(
        abspath(context.config.FILE_LOADER_ROOT_PATH))

    result = LoaderResult()

    if not inside_root_path:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        callback(result)
        return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if exists(file_path):
        with open(file_path, 'rb') as f:
            stats = fstat(f.fileno())

            result.successful = True
            result.buffer = f.read()

            result.metadata.update(size=stats.st_size,
                                   updated_at=datetime.utcfromtimestamp(
                                       stats.st_mtime))
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
Exemple #3
0
def load(context, path, callback):
    file_path = join(
        context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/'))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(
        abspath(context.config.FILE_LOADER_ROOT_PATH))

    result = LoaderResult()

    if not inside_root_path:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        callback(result)
        return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            result.successful = True
            result.buffer = f.read()

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.utcfromtimestamp(stats.st_mtime))
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
Exemple #4
0
    def dispatch(self, file_key):
        """ Callback method for getObject from s3 """
        if not file_key or 'Error' in file_key or 'Body' not in file_key:

            logger.error("ERROR retrieving image from S3 {0}: {1}".format(
                self.key, str(file_key)))

            # If we got here, there was a failure.
            # We will return 404 if S3 returned a 404, otherwise 502.
            result = LoaderResult()
            result.successful = False

            if not file_key:
                result.error = LoaderResult.ERROR_UPSTREAM
                self.callback(result)
                return

            response_metadata = file_key.get('ResponseMetadata', {})
            status_code = response_metadata.get('HTTPStatusCode')

            if status_code == 404:
                result.error = LoaderResult.ERROR_NOT_FOUND
                self.callback(result)
                return

            if self.retries_counter < self.max_retry:
                self.__increment_retry_counter()
                self.bucket_loader.get(self.key, callback=self.dispatch)
            else:
                result.error = LoaderResult.ERROR_UPSTREAM
                self.callback(result)
        else:
            self.callback(file_key['Body'].read())
Exemple #5
0
 def handle_data(file_key):
     if not file_key or 'Error' in file_key or 'Body' not in file_key:
         logger.warn("ERROR retrieving image from S3 {0}: {1}".format(key, str(file_key)))
         # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502.
         result = LoaderResult()
         result.successful = False
         if file_key and file_key.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
             result.error = LoaderResult.ERROR_NOT_FOUND
         else:
             result.error = LoaderResult.ERROR_UPSTREAM
         callback(result)
     else:
         callback(file_key['Body'].read())
Exemple #6
0
def return_contents(response, url, context, req_start=None):
    res = urlparse(url)
    if req_start:
        finish = datetime.datetime.now()
        context.metrics.timing(
            "original_image.fetch.{0}.{1}".format(response.code,
                                                  res.netloc.replace(".",
                                                                     "_")),
            (finish - req_start).total_seconds() * 1000,
        )
        context.metrics.incr("original_image.fetch.{0}.{1}".format(
            response.code, res.netloc.replace(".", "_")))

    result = LoaderResult()
    context.metrics.incr("original_image.status." + str(response.code))
    context.metrics.incr("original_image.status.{0}.{1}".format(
        response.code, res.netloc.replace(".", "_")))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warning(u"ERROR retrieving image %s: %s", url,
                       str(response.error))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warning(u"ERROR retrieving image %s: Empty response.", url)
    else:
        if response.time_info:
            for metric_name in response.time_info:
                context.metrics.timing(
                    "original_image.time_info." + metric_name,
                    response.time_info[metric_name] * 1000,
                )
            context.metrics.timing(
                "original_image.time_info.bytes_per_second",
                len(response.body) // response.time_info["total"],
            )
        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr("original_image.response_bytes",
                             len(response.body))

    return result
Exemple #7
0
 def handle_data(file_key):
     if not file_key or 'Error' in file_key or 'Body' not in file_key:
         logger.warn("ERROR retrieving image from S3 {0}: {1}".format(
             key, str(file_key)))
         # If we got here, there was a failure. We will return 404 if S3 returned a 404, otherwise 502.
         result = LoaderResult()
         result.successful = False
         if file_key and file_key.get('ResponseMetadata',
                                      {}).get('HTTPStatusCode') == 404:
             result.error = LoaderResult.ERROR_NOT_FOUND
         else:
             result.error = LoaderResult.ERROR_UPSTREAM
         callback(result)
     else:
         callback(file_key['Body'].read())
Exemple #8
0
def return_contents_error(data, callback, *args, **kwargs):
    if not data:
        return
    data = data.split('\n')[0]
    logger.error(data)

    result = LoaderResult()
    result.successful = False
    if 'Failed to resolve hostname' in data:
        result.error = LoaderResult.ERROR_UPSTREAM
    elif 'Failed to resolve hostname' in data:
        result.error = LoaderResult.ERROR_TIMEOUT
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
    callback(result)
Exemple #9
0
def load(context, path, callback):
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'),
                     unquote(path).lstrip('/'))
    file_path = abspath(file_path)

    inside_root_path = file_path.startswith(
        context.config.FILE_LOADER_ROOT_PATH)

    if inside_root_path and is_video(file_path):
        # Extract a frame from the video and load it instead of the original path
        logger.warning('processing video... %s', file_path)
        with get_video_frame(context, file_path) as image_path:
            if image_path:
                callback(read_file(image_path))
                return
    elif inside_root_path and is_pdf(file_path):
        # extract first page of pdf and load it
        logger.warning('processing pdf... %s', file_path)
        with get_pdf_page(context, file_path) as image_path:
            if image_path:
                callback(read_file(image_path))
                return
    else:
        # First attempt to load with file_loader
        file_loader.load(context, path, callback)
        return

    # If we got here, there was a failure
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
Exemple #10
0
def load(context, path, callback):
    """
    Loads a file. In case the requested file is a video, instead of loading
    its contents this method extracts a frame from the video using ffmpeg,
    and returns the image.
    :param Context context: Thumbor's context
    :param string url: Path to load
    :param callable callback: Callback method once done
    """
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), unquote(path).lstrip('/'))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH)

    if inside_root_path and exists(file_path):

        if is_video(file_path):
            # Extract a frame from the video and load it instead of the original path
            with get_video_frame(context, file_path) as image_path:
                if image_path:
                    callback(read_file(image_path))
                    return
        else:
            callback(read_file(file_path))
            return

    # If we got here, there was a failure
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
def load(context, path, callback):
    """
    Loads a file. In case the requested file is a video, instead of loading
    its contents this method extracts a frame from the video using ffmpeg,
    and returns the image.
    :param Context context: Thumbor's context
    :param string url: Path to load
    :param callable callback: Callback method once done
    """
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip("/"), unquote(path).lstrip("/"))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH)

    if inside_root_path and exists(file_path):

        if is_video(file_path):
            # Extract a frame from the video and load it instead of the original path
            with get_video_frame(context, file_path) as image_path:
                if image_path:
                    callback(read_file(image_path))
                    return
        else:
            callback(read_file(file_path))
            return

    # If we got here, there was a failure
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
def load(self, path, callback):
    db = __conn__(self)
    words2 = path.split("/")
    storage = self.config.MONGO_ORIGIN_SERVER_COLLECTION
    images = gridfs.GridFS(db, collection=storage)
    result = LoaderResult()
    if ObjectId.is_valid(words2[0]):
        if images.exists(ObjectId(words2[0])):
            contents = images.get(ObjectId(words2[0])).read()
            result.successful = True
            result.buffer = contents
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND
            result.successful = False
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
    callback(result)
Exemple #13
0
    def callback_wrapper(result):
        r = LoaderResult()
        if result is not None:
            r.successful = True
            r.buffer = result
        else:
            r.error = LoaderResult.ERROR_NOT_FOUND
            r.successful = False

        callback(r)
Exemple #14
0
def return_contents(response, url, callback, context, req_start=None):
    if req_start:
        finish = datetime.datetime.now()
        res = urlparse(url)
        context.metrics.timing(
            'original_image.fetch.{0}.{1}'.format(response.code,
                                                  res.netloc.replace('.',
                                                                     '_')),
            (finish - req_start).total_seconds() * 1000,
        )

    result = LoaderResult()
    context.metrics.incr('original_image.status.' + str(response.code))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warning(u"ERROR retrieving image {0}: {1}".format(
            url, str(response.error)))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url))
    else:
        if response.time_info:
            for x in response.time_info:
                context.metrics.timing('original_image.time_info.' + x,
                                       response.time_info[x] * 1000)
            context.metrics.timing(
                'original_image.time_info.bytes_per_second',
                len(response.body) // response.time_info['total'])
        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr('original_image.response_bytes',
                             len(response.body))

    callback(result)
Exemple #15
0
def return_contents(response, url, callback, context, req_start=None):
    if req_start:
        finish = datetime.datetime.now()
        res = urlparse(url)
        context.metrics.timing(
            'original_image.fetch.{0}.{1}'.format(response.code, res.netloc),
            (finish - req_start).total_seconds() * 1000,
        )

    result = LoaderResult()
    context.metrics.incr('original_image.status.' + str(response.code))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warn(u"ERROR retrieving image {0}: {1}".format(
            url, str(response.error)))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url))
    else:
        if response.time_info:
            for x in response.time_info:
                context.metrics.timing('original_image.time_info.' + x,
                                       response.time_info[x] * 1000)
            context.metrics.timing(
                'original_image.time_info.bytes_per_second',
                len(response.body) / response.time_info['total'])
        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr('original_image.response_bytes', len(
            response.body))

    callback(result)
 def callback_wrapper(result):
     if result.successful:
         callback(result)
     else:
         # If file_loader failed try http_loader
         if (path.find('http') != -1):
             http_loader.load(context, path, callback)
         else:
             result = LoaderResult()
             result.error = LoaderResult.ERROR_NOT_FOUND
             result.successful = False
             callback(result)
Exemple #17
0
def load(context, url, callback, normalize_url_func=_normalize_url):
    logger.warn( '>>> %s URL | ' % url)
    if is_video(url):
        # Extract a frame from the video and load it instead of the original path
        with get_video_frame(context, url, normalize_url_func) as image_path:
            if image_path:
                callback(read_file(image_path))
                return

        # If we got here, there was a failure
        result = LoaderResult()
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False
        callback(result)
        return

    load_sync(context, url, callback, normalize_url_func)
def load(context, path, callback):
    bucket_id = context.config.get("CLOUD_STORAGE_BUCKET_ID")
    project_id = context.config.get("CLOUD_STORAGE_PROJECT_ID")
    bucket = buckets[project_id].get(bucket_id, None)
    if bucket is None:
        client = storage.Client(project_id)
        bucket = client.get_bucket(bucket_id)
        buckets[project_id][bucket_id] = bucket

    blob = bucket.get_blob(path)
    if blob:
        callback(blob.download_as_string())
    else:
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_NOT_FOUND
        callback(result)
def load(context, path, callback):
  result = LoaderResult()
  conn = boto.connect_s3(
    aws_access_key_id = context.config.AWS_S3_ACCESS_KEY,
    aws_secret_access_key = context.config.AWS_S3_SECRET_KEY,
    host=context.config.AWS_HOST,
    calling_format = boto.s3.connection.OrdinaryCallingFormat(),
    )
  bucket =  conn.get_bucket(context.config.AWS_S3_BUCKET,validate=False)
  data = bucket.get_key(path)
  if data is None:
      result.successful = False
      result.error = LoaderResult.ERROR_NOT_FOUND
  else:
    result.buffer = data.get_contents_as_string()
    result.successful = True

  callback(result)
Exemple #20
0
async def load(context, path):
    # First attempt to load with file_loader
    result = await file_loader.load(context, path)

    if result.successful:
        return result

    # If file_loader failed try http_loader

    if not http_loader.validate(context, path):
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_BAD_REQUEST
        result.extras["reason"] = "Unallowed domain"
        result.extras["source"] = path

        return result

    return await http_loader.load(context, path)
def load(context, path, callback):
    file_path = abspath(join(dirname(__file__), "fixtures/images/image.jpg"))
    result = LoaderResult()
    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            result.successful = True
            result.buffer = f.read()

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.utcfromtimestamp(stats.st_mtime)
            )
    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
def load(context, path, callback):
    file_path = join(context.config.FILE_LOADER_ROOT_PATH.rstrip('/'), path.lstrip('/'))
    file_path = abspath(file_path)
    inside_root_path = file_path.startswith(context.config.FILE_LOADER_ROOT_PATH)

    result = LoaderResult()

    if inside_root_path and exists(file_path):

        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            result.successful = True
            response = f.read()

            excerpt_length = context.config.LOADER_EXCERPT_LENGTH
            result.buffer = response[:excerpt_length]

            if len(result.buffer) == excerpt_length:
                temp = NamedTemporaryFile(delete=False)
                temp.write(response)
                temp.close()

                context.wikimedia_original_file = temp

                tornado.ioloop.IOLoop.instance().call_later(
                    context.config.HTTP_LOADER_TEMP_FILE_TIMEOUT,
                    partial(
                        cleanup_temp_file,
                        context.wikimedia_original_file.name
                    )
                )

            result.metadata.update(
                size=stats.st_size,
                updated_at=datetime.utcfromtimestamp(stats.st_mtime)
            )
    else:
        result.error = 404
        result.successful = False

    callback(result)
Exemple #23
0
def load(context, path, callback):
    result = LoaderResult()

    for idx, next_dir in enumerate(context.config.TC_MULTIDIR_PATHS):

        file_path = join(next_dir.rstrip('/'), path.lstrip('/'))
        file_path = abspath(file_path)

        inside_root_path = file_path.startswith(abspath(next_dir))

        if inside_root_path:
            
            # keep backwards compatibility, try the actual path first
            # if not found, unquote it and try again
            found = exists(file_path)
            if not found:
                file_path = unquote(file_path)
                found = exists(file_path)

            if found:
                with open(file_path, 'rb') as f:
                    stats = fstat(f.fileno())

                    result.successful = True
                    result.buffer = f.read()

                    result.metadata.update(
                        size=stats.st_size,
                        updated_at=datetime.utcfromtimestamp(stats.st_mtime))
                callback(result)
                return

        logger.debug('TC_MULTIDIR: File {0} not found in {1}'.format(path, next_dir))
        # else loop and try next directory
    
    if not context.config.TC_MULTIDIR_PATHS:
        logger.error('TC_MULTIDIR: No paths set in configuration TC_MULTIDIR_PATHS')

    # no file found
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    callback(result)
Exemple #24
0
def load(context, url, callback):
    url = quote_url(url)
    match = S3_RE.match(url)

    def callback_wrapper(result):
        if result.successful:
            callback(result)
        else:
            logger.info('s3 {0}'.format(
                os.path.join(match.group('bucket').rstrip('/'), match.group('path').lstrip('/')))
            )

            # If not on efs, try s3
            S3Loader.load(context,
                          os.path.join(match.group('bucket').rstrip('/'),
                                       match.group('path').lstrip('/')),
                          callback)

    # If melody s3 file, first try to load from efs
    if match:
        logger.info('BOTO {0}'.format(match.group('path')))

        # TEMP try s3 direct
        S3Loader.load(context,
                      os.path.join(match.group('bucket').rstrip('/'),
                                   match.group('path').lstrip('/')),
                      callback)

        # FileLoader.load(context, match.group('path'), callback_wrapper)
    # else get from the internet
    elif HTTP_RE.match(url):
        logger.info('WEB {0}'.format(url))
        HttpLoader.load(context, url, callback)
    else:
        logger.info('FILE {0}'.format(url))
        result = LoaderResult()
        result.successful = False
        result.error = LoaderResult.ERROR_NOT_FOUND
        # callback(result)
        # TEMP enable file loader
        FileLoader.load(context, url, callback)
Exemple #25
0
 def load_override(context, path, callback):
     result = LoaderResult()
     result.successful = False
     result.error = 409
     callback(result)
Exemple #26
0
def get_not_found_result():
    # type: () -> LoaderResult
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    return result
 async def load_override(*_):
     result = LoaderResult()
     result.successful = False
     result.error = 409
     return result
Exemple #28
0
 def load_override(context, path, callback):
     result = LoaderResult()
     result.successful = False
     result.error = 409
     callback(result)
Exemple #29
0
def load(context, path, callback):
    file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'),
                     path.lstrip('/'))
    file_path = abspath(file_path)
    file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'),
                         path.lstrip('/'))
    file_path_two = abspath(file_path_two)
    inside_root_path = file_path.startswith(
        abspath(context.config.PIC_LOADER_ROOT_PATH))
    inside_root_path_two = file_path_two.startswith(
        abspath(context.config.PIC_LOADER_FALLBACK_PATH))
    result = LoaderResult()

    if not inside_root_path:
        if not inside_root_path_two:
            result.error = LoaderResult.ERROR_NOT_FOUND
            result.successful = False
            callback(result)
        return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if not exists(file_path_two):
        file_path_two = unquote(file_path_two)

    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())

            if stats.st_size <= 1:
                logger.warning(u"%s: cette image source est vide...",
                               file_path)
                result.successful = False
                result.error = LoaderResult.ERROR_UPSTREAM
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))

    elif exists(file_path_two):
        with open(file_path_two, 'r') as f:
            stats = fstat(f.fileno())

            if stats.st_size <= 4:
                logger.warning(u"%s: cette image source est vide...",
                               file_path_two)
                result.successful = False
                result.error = LoaderResult.ERROR_UPSTREAM
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))

    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    callback(result)
def load_sync(context, url, callback):
    logger.debug('[SWIFT_LOADER] load_sync: %s' % url, extra=log_extra(context))

    result = LoaderResult()

    container = context.wikimedia_original_container
    path = context.wikimedia_original_filepath

    try:
        logger.debug(
            '[SWIFT_LOADER] fetching %s from container %s' % (path, container),
            extra=log_extra(context)
        )

        start = datetime.datetime.now()

        logging.disable(logging.ERROR)
        headers, response = swift(context).get_object(
            container,
            path
        )
        logging.disable(logging.NOTSET)

        record_timing(context, datetime.datetime.now() - start, 'swift.original.read.success', 'Thumbor-Swift-Original-Success-Time')

        context.metrics.incr('swift_loader.status.success')

        # XXX hack: If the file is an STL, we overwrite the first five bytes
        # with the word "solid", to trick the MIME detection pipeline.
        extension = path[-4:].lower()
        isSTL = extension == '.stl'

        f = NamedTemporaryFile(delete=False)
        logger.debug(
            '[SWIFT_LOADER] writing %d bytes to temp file' % len(response),
            extra=log_extra(context)
        )
        f.write(response)
        f.close()

        excerpt_length = context.config.LOADER_EXCERPT_LENGTH

        # First kb of the body for MIME detection
        body = response[:excerpt_length]

        # See above - text STLs have this string here anyway, and
        # binary STLs ignore the first 80 bytes, so this string will
        # be ignored.
        if isSTL:
            body = 'solid' + body[5:]

        if len(body) == excerpt_length:
            logger.debug('[SWIFT_LOADER] return_contents: %s' % f.name, extra=log_extra(context))
            context.wikimedia_original_file = f

            tornado.ioloop.IOLoop.instance().call_later(
                context.config.HTTP_LOADER_TEMP_FILE_TIMEOUT,
                partial(
                    cleanup_temp_file,
                    context,
                    context.wikimedia_original_file.name
                )
            )
        else:
            logger.debug('[SWIFT_LOADER] return_contents: small body')
            cleanup_temp_file(context, f.name)

        result.buffer = body
    except ClientException as e:
        record_timing(context, datetime.datetime.now() - start, 'swift.original.read.miss', 'Thumbor-Swift-Original-Miss-Time')
        logging.disable(logging.NOTSET)
        result.successful = False
        result.error = LoaderResult.ERROR_NOT_FOUND
        logger.error('[SWIFT_LOADER] get_object failed: %s %r' % (url, e), extra=log_extra(context))
        context.metrics.incr('swift_loader.status.client_exception')
    except requests.ConnectionError as e:
        record_timing(context, datetime.datetime.now() - start, 'swift.original.read.exception', 'Thumbor-Swift-Original-Exception-Time')
        logging.disable(logging.NOTSET)
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM
        logger.error('[SWIFT_LOADER] get_object failed: %s %r' % (url, e), extra=log_extra(context))
        context.metrics.incr('swift_loader.status.connection_error')

    callback(result)
def load(context, path):
    file_path = join(context.config.PIC_LOADER_ROOT_PATH.rstrip('/'),
                     path.lstrip('/'))
    file_path = abspath(file_path)
    file_path_two = join(context.config.PIC_LOADER_FALLBACK_PATH.rstrip('/'),
                         path.lstrip('/'))
    file_path_two = abspath(file_path_two)
    inside_root_path = file_path.startswith(
        abspath(context.config.PIC_LOADER_ROOT_PATH))
    inside_root_path_two = file_path_two.startswith(
        abspath(context.config.PIC_LOADER_FALLBACK_PATH))

    if not exists(context.config.PIC_LOADER_MAX_SIZE):
        oversize = context.config.PIC_LOADER_MAX_SIZE
    else:
        oversize = 16777216

    result = LoaderResult()

    if not inside_root_path:
        if not inside_root_path_two:
            result.error = LoaderResult.ERROR_NOT_FOUND
            result.successful = False
            return result  #callback(result)
        pass  #return

    # keep backwards compatibility, try the actual path first
    # if not found, unquote it and try again
    if not exists(file_path):
        file_path = unquote(file_path)

    if not exists(file_path_two):
        file_path_two = unquote(file_path_two)

    if exists(file_path):
        with open(file_path, 'r') as f:
            stats = fstat(f.fileno())
            if stats.st_size >= oversize:
                result.error = LoaderResult.COULD_NOT_LOAD_IMAGE
                result.successful = False
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))
    elif exists(file_path_two):
        with open(file_path_two, 'r') as f:
            stats = fstat(f.fileno())
            if stats.st_size >= oversize:
                result.error = LoaderResult.COULD_NOT_LOAD_IMAGE
                result.successful = False
            else:
                result.successful = True
                result.buffer = f.read()

                result.metadata.update(size=stats.st_size,
                                       updated_at=datetime.utcfromtimestamp(
                                           stats.st_mtime))

    else:
        result.error = LoaderResult.ERROR_NOT_FOUND
        result.successful = False

    return result
Exemple #32
0
def return_contents(response, url, callback, context, req_start=None):
    if req_start:
        finish = datetime.datetime.now()
        res = urlparse(url)
        context.metrics.timing(
            'original_image.fetch.{0}.{1}'.format(response.code, res.netloc),
            (finish - req_start).total_seconds() * 1000,
        )

    result = LoaderResult()
    context.metrics.incr('original_image.status.' + str(response.code))
    if response.error:
        result.successful = False
        if response.code == 599:
            # Return a Gateway Timeout status downstream if upstream times out
            result.error = LoaderResult.ERROR_TIMEOUT
        else:
            result.error = LoaderResult.ERROR_NOT_FOUND

        logger.warn(u"ERROR retrieving image {0}: {1}".format(
            url, str(response.error)))

    elif response.body is None or len(response.body) == 0:
        result.successful = False
        result.error = LoaderResult.ERROR_UPSTREAM

        logger.warn(u"ERROR retrieving image {0}: Empty response.".format(url))
    else:
        if response.time_info:
            for x in response.time_info:
                context.metrics.timing('original_image.time_info.' + x,
                                       response.time_info[x] * 1000)
            context.metrics.timing(
                'original_image.time_info.bytes_per_second',
                len(response.body) / response.time_info['total'])

        result.buffer = response.body
        result.metadata.update(response.headers)
        context.metrics.incr('original_image.response_bytes',
                             len(response.body))

        if context.config.NORMALIZE_TO_72DPI and context.config.CONVERT_PATH:
            if not exists(context.config.CONVERT_PATH):
                logger.warn(
                    'imagemagick/convert enabled but binary CONVERT_PATH does not exist'
                )
            else:
                try:
                    if "jpeg" in result.metadata['Content-Type']:
                        command = [
                            context.config.CONVERT_PATH +
                            ' - -density 72,72 -strip - ',
                        ]

                        normalize_dpi_cmd = Popen(command,
                                                  stdin=PIPE,
                                                  stdout=PIPE,
                                                  stderr=PIPE,
                                                  close_fds=True,
                                                  shell=True)

                        normalize_dpi_stdout, normalize_dpi_stderr = normalize_dpi_cmd.communicate(
                            input=response.body)

                        if normalize_dpi_cmd.returncode != 0:
                            logger.warn(
                                'dpi normalization finished with non-zero return code (%d): %s'
                                % (normalize_dpi_cmd.returncode,
                                   normalize_dpi_stderr))
                        else:
                            result.buffer = normalize_dpi_stdout
                except KeyError:
                    logger.warn('Content-Type KeyError on %s' % url)
    callback(result)
Exemple #33
0
def get_not_found_result():
    # type: () -> LoaderResult
    result = LoaderResult()
    result.error = LoaderResult.ERROR_NOT_FOUND
    result.successful = False
    return result