def _load_object(self, data): """ Loads the object in /tmp in case of object processing """ extra_get_args = {} if 'url' in data: url = data['url'] logger.info('Getting dataset from {}'.format(url.path)) if url.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*url.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format( url.part, extra_get_args['Range'])) resp = requests.get(url.path, headers=extra_get_args, stream=True) url.data_stream = resp.raw if 'obj' in data: obj = data['obj'] logger.info('Getting dataset from {}://{}/{}'.format( obj.backend, obj.bucket, obj.key)) if obj.backend == self.internal_storage.backend: storage = self.internal_storage.storage else: storage = Storage(lithops_config=self.lithops_config, storage_backend=obj.backend) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) obj.data_stream = wsb else: sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = sb
def _load_object(self, data): """ Loads the object in /tmp in case of object processing """ extra_get_args = {} obj = data['obj'] if hasattr(obj, 'bucket') and not hasattr(obj, 'path'): logger.info('Getting dataset from {}://{}/{}'.format( obj.backend, obj.bucket, obj.key)) if obj.backend == self.internal_storage.backend: storage = self.internal_storage.storage else: storage = Storage(config=self.lithops_config, backend=obj.backend) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) obj.data_stream = wsb else: sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = sb elif hasattr(obj, 'url'): logger.info('Getting dataset from {}'.format(obj.url)) if obj.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*obj.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) resp = requests.get(obj.url, headers=extra_get_args, stream=True) obj.data_stream = resp.raw elif hasattr(obj, 'path'): logger.info('Getting dataset from {}'.format(obj.path)) with open(obj.path, "rb") as f: if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) first_byte, last_byte = obj.data_byte_range f.seek(first_byte) buffer = io.BytesIO(f.read(last_byte - first_byte + 1)) sb = WrappedStreamingBodyPartition(buffer, obj.chunk_size, obj.data_byte_range) else: sb = io.BytesIO(f.read()) obj.data_stream = sb