Exemple #1
0
    def _load_object(self, data):
        """
        Loads the object in /tmp in case of object processing
        """
        extra_get_args = {}

        if 'url' in data:
            url = data['url']
            logger.info('Getting dataset from {}'.format(url.path))
            if url.data_byte_range is not None:
                range_str = 'bytes={}-{}'.format(*url.data_byte_range)
                extra_get_args['Range'] = range_str
                logger.info('Chunk: {} - Range: {}'.format(
                    url.part, extra_get_args['Range']))
            resp = requests.get(url.path, headers=extra_get_args, stream=True)
            url.data_stream = resp.raw

        if 'obj' in data:
            obj = data['obj']
            logger.info('Getting dataset from {}://{}/{}'.format(
                obj.backend, obj.bucket, obj.key))

            if obj.backend == self.internal_storage.backend:
                storage = self.internal_storage.storage
            else:
                storage = Storage(lithops_config=self.lithops_config,
                                  storage_backend=obj.backend)

            if obj.data_byte_range is not None:
                extra_get_args['Range'] = 'bytes={}-{}'.format(
                    *obj.data_byte_range)
                logger.info('Chunk: {} - Range: {}'.format(
                    obj.part, extra_get_args['Range']))
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size,
                                                    obj.data_byte_range)
                obj.data_stream = wsb
            else:
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                obj.data_stream = sb
Exemple #2
0
    def _load_object(self, data):
        """
        Loads the object in /tmp in case of object processing
        """
        extra_get_args = {}

        obj = data['obj']

        if hasattr(obj, 'bucket') and not hasattr(obj, 'path'):
            logger.info('Getting dataset from {}://{}/{}'.format(
                obj.backend, obj.bucket, obj.key))

            if obj.backend == self.internal_storage.backend:
                storage = self.internal_storage.storage
            else:
                storage = Storage(config=self.lithops_config,
                                  backend=obj.backend)

            if obj.data_byte_range is not None:
                extra_get_args['Range'] = 'bytes={}-{}'.format(
                    *obj.data_byte_range)
                logger.info('Chunk: {} - Range: {}'.format(
                    obj.part, extra_get_args['Range']))
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size,
                                                    obj.data_byte_range)
                obj.data_stream = wsb
            else:
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                obj.data_stream = sb

        elif hasattr(obj, 'url'):
            logger.info('Getting dataset from {}'.format(obj.url))
            if obj.data_byte_range is not None:
                range_str = 'bytes={}-{}'.format(*obj.data_byte_range)
                extra_get_args['Range'] = range_str
                logger.info('Chunk: {} - Range: {}'.format(
                    obj.part, extra_get_args['Range']))
            resp = requests.get(obj.url, headers=extra_get_args, stream=True)
            obj.data_stream = resp.raw

        elif hasattr(obj, 'path'):
            logger.info('Getting dataset from {}'.format(obj.path))
            with open(obj.path, "rb") as f:
                if obj.data_byte_range is not None:
                    extra_get_args['Range'] = 'bytes={}-{}'.format(
                        *obj.data_byte_range)
                    logger.info('Chunk: {} - Range: {}'.format(
                        obj.part, extra_get_args['Range']))
                    first_byte, last_byte = obj.data_byte_range
                    f.seek(first_byte)
                    buffer = io.BytesIO(f.read(last_byte - first_byte + 1))
                    sb = WrappedStreamingBodyPartition(buffer, obj.chunk_size,
                                                       obj.data_byte_range)
                else:
                    sb = io.BytesIO(f.read())
            obj.data_stream = sb