def _create_data_stream(self, data): """ Creates the data stream in case of object processing """ extra_get_args = {} if 'url' in data: url = data['url'] logger.info('Getting dataset from {}'.format(url.path)) if url.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*url.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format(url.part, extra_get_args['Range'])) resp = requests.get(url.path, headers=extra_get_args, stream=True) url.data_stream = resp.raw if 'obj' in data: obj = data['obj'] obj.storage_backend storage_handler = Storage(self.pywren_config, obj.storage_backend).get_storage_handler() logger.info('Getting dataset from {}://{}/{}'.format(obj.storage_backend, obj.bucket, obj.key)) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format(*obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format(obj.part, extra_get_args['Range'])) sb = storage_handler.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) else: obj.data_stream = storage_handler.get_object(obj.bucket, obj.key, stream=True)
def _load_object(self, data): """ Loads the object in /tmp in case of object processing """ extra_get_args = {} if 'url' in data: url = data['url'] logger.info('Getting dataset from {}'.format(url.path)) if url.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*url.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format( url.part, extra_get_args['Range'])) resp = requests.get(url.path, headers=extra_get_args, stream=True) url.data_stream = resp.raw if 'obj' in data: obj = data['obj'] logger.info('Getting dataset from {}://{}/{}'.format( obj.backend, obj.bucket, obj.key)) if obj.backend == self.internal_storage.backend: storage = self.internal_storage.storage else: storage = Storage(pywren_config=self.pywren_config, storage_backend=obj.backend) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) obj.data_stream = wsb else: sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = sb