def run_tests(test_to_run, config=None, mode=None, backend=None, storage=None): global CONFIG, STORAGE_CONFIG, STORAGE mode = mode or get_mode(backend, config) config_ow = {'lithops': {'mode': mode}} if storage: config_ow['lithops']['storage'] = storage if backend: config_ow[mode] = {'backend': backend} CONFIG = default_config(config, config_ow) STORAGE_CONFIG = extract_storage_config(CONFIG) STORAGE = Storage(storage_config=STORAGE_CONFIG) suite = unittest.TestSuite() if test_to_run == 'all': suite.addTest(unittest.makeSuite(TestLithops)) else: try: suite.addTest(TestLithops(test_to_run)) except ValueError: print("unknown test, use: --help") sys.exit() runner = unittest.TextTestRunner() runner.run(suite)
def run_tests(tests, config=None, group=None, backend=None, storage=None, fail_fast=False, keep_datasets=False): global CONFIG, STORAGE_CONFIG, STORAGE config_ow = {'lithops': {}} if storage: config_ow['lithops']['storage'] = storage if backend: config_ow['lithops']['backend'] = backend CONFIG = default_config(config, config_ow) STORAGE_CONFIG = extract_storage_config(CONFIG) STORAGE = Storage(storage_config=STORAGE_CONFIG) init_test_variables() suite = unittest.TestSuite() config_suite(suite, tests, group) words_in_data_set = upload_data_sets() # uploads datasets and returns word count main_util.init_config(CONFIG, STORAGE, STORAGE_CONFIG, words_in_data_set, TEST_FILES_URLS) runner = unittest.TextTestRunner(verbosity=2, failfast=fail_fast) tests_results = runner.run(suite) # removes previously uploaded datasets from storage. if not keep_datasets: clean_tests(STORAGE, STORAGE_CONFIG, PREFIX) if not tests_results.wasSuccessful(): # Fails github workflow action to reject merge to repository sys.tracebacklimit = 0 # avoid displaying redundant stack track-back info raise Exception("--------Test procedure failed. Merge rejected--------")
def _fill_optional_args(self, function, data): """ Fills in those reserved, optional parameters that might be write to the function signature """ func_sig = inspect.signature(function) if 'ibm_cos' in func_sig.parameters: if 'ibm_cos' in self.lithops_config: if self.internal_storage.backend == 'ibm_cos': ibm_boto3_client = self.internal_storage.get_client() else: ibm_boto3_client = Storage( lithops_config=self.lithops_config, storage_backend='ibm_cos').get_client() data['ibm_cos'] = ibm_boto3_client else: raise Exception( 'Cannot create the ibm_cos client: missing configuration') if 'storage' in func_sig.parameters: data['storage'] = self.internal_storage.storage if 'rabbitmq' in func_sig.parameters: if 'rabbitmq' in self.lithops_config: rabbit_amqp_url = self.lithops_config['rabbitmq'].get( 'amqp_url') params = pika.URLParameters(rabbit_amqp_url) connection = pika.BlockingConnection(params) data['rabbitmq'] = connection else: raise Exception( 'Cannot create the rabbitmq client: missing configuration') if 'id' in func_sig.parameters: data['id'] = int(self.call_id)
def __init__(self, ds_config, db_config, use_db_cache=True, use_ds_cache=True, hybrid_impl='auto'): self.config = default_config() self.ds_config = ds_config self.db_config = db_config self.use_db_cache = use_db_cache self.use_ds_cache = use_ds_cache if hybrid_impl == 'auto': self.hybrid_impl = ( self.config['lithops']['mode'] == 'localhost' or self.config['lithops']['mode'] == 'serverless' and 'ibm_vpc' in self.config ) if self.hybrid_impl: logger.info(f'Using the Hybrid implementation') else: logger.info(f'Using the pure Serverless implementation') else: self.hybrid_impl = hybrid_impl lithops_bucket = self.config['lithops']['storage_bucket'] self.ds_bucket = self.config.get('storage', {}).get('ds_bucket', lithops_bucket) self.lithops_executor = lithops.FunctionExecutor(config=self.config, runtime_memory=2048) if self.hybrid_impl: if self.config['lithops']['mode'] == 'localhost': self.lithops_vm_executor = self.lithops_executor else: self.lithops_vm_executor = lithops.StandaloneExecutor(config=self.config) self.storage = Storage(config=self.config) cache_namespace = 'vm' if hybrid_impl else 'function' self.cacher = PipelineCacher( self.storage, lithops_bucket, cache_namespace, self.ds_config["name"], self.db_config["name"] ) if not self.use_db_cache or not self.use_ds_cache: self.cacher.clean(database=not self.use_db_cache, dataset=not self.use_ds_cache) stats_path_cache_key = ':ds/:db/stats_path.cache' if self.cacher.exists(stats_path_cache_key): self.stats_path = self.cacher.load(stats_path_cache_key) PipelineStats.path = self.stats_path logger.info(f'Using cached {self.stats_path} for statistics') else: PipelineStats.init() self.stats_path = PipelineStats.path self.cacher.save(self.stats_path, stats_path_cache_key) logger.info(f'Initialised {self.stats_path} for statistics') self.ds_segm_size_mb = 128 self.image_gen_config = { "q": 99, "do_preprocessing": False, "nlevels": 30, "ppm": 3.0 }
def __init__( self, imzml_file: str, ibd_file: str, moldb_files: Union[List[int], List[str]], ds_config: DSConfig, sm_config: Optional[Dict] = None, use_cache=True, out_dir: Optional[str] = None, executor: Optional[Executor] = None, ): sm_config = sm_config or SMConfig.get_conf() self.storage = Storage(config=sm_config['lithops']) sm_storage = sm_config['lithops']['sm_storage'] self.imzml_cobj = _upload_if_needed(imzml_file, self.storage, sm_storage, 'imzml', use_db_mutex=False) self.ibd_cobj = _upload_if_needed(ibd_file, self.storage, sm_storage, 'imzml', use_db_mutex=False) if isinstance(moldb_files[0], int): self.moldb_defs = _upload_moldbs_from_db(moldb_files, self.storage, sm_storage) else: self.moldb_defs = _upload_moldbs_from_files( moldb_files, self.storage, sm_storage) self.ds_config = ds_config self.out_dir = Path(out_dir) if out_dir else Path('./result_pngs') if use_cache: cache_key: Optional[str] = jsonhash({ 'imzml': imzml_file, 'ibd': ibd_file, 'dbs': moldb_files, 'ds': ds_config }) else: cache_key = None self.pipe = Pipeline( self.imzml_cobj, self.ibd_cobj, self.moldb_defs, self.ds_config, executor=executor, cache_key=cache_key, use_db_cache=use_cache, use_db_mutex=False, lithops_config=sm_config['lithops'], )
def find_shared_objects(calls): # find and annotate repeated arguments record = {} for i, call in enumerate(calls): for j, arg in enumerate(call[1]): if id(arg) in record: record[id(arg)].append((i, j)) else: record[id(arg)] = [arg, (i, j)] for k, v in call[2].items(): if id(v) in record: record[id(v)].append((i, k)) else: record[id(v)] = [v, (i, k)] # If we found multiple occurrences of one object, then # store it in shared memory, pass a proxy as a value calls = [list(item) for item in calls] storage = Storage() thread_pool = ThreadPoolExecutor(max_workers=len(record)) def put_arg_obj(positions): obj = positions.pop(0) if len(positions) > 1 and consider_sharing(obj): logger.debug('Proxying {}'.format(type(obj))) obj_bin = pickle.dumps(obj) cloud_object = storage.put_cloudobject(obj_bin) for pos in positions: call_n, idx_or_key = pos call = calls[call_n] if isinstance(idx_or_key, str): call[2][idx_or_key] = cloud_object else: args_as_list = list(call[1]) args_as_list[idx_or_key] = cloud_object call[1] = tuple(args_as_list) try: call[3].append(idx_or_key) except IndexError: call.append([idx_or_key]) fut = [] for positions in record.values(): f = thread_pool.submit(put_arg_obj, positions) fut.append(f) [f.result() for f in fut] return [tuple(item) for item in calls]
def clean_file(file_name): file_location = os.path.join(CLEANER_DIR, file_name) if file_location in [CLEANER_LOG_FILE, CLEANER_PID_FILE]: return with open(file_location, 'rb') as pk: data = pickle.load(pk) if 'jobs_to_clean' in data: jobs_to_clean = data['jobs_to_clean'] storage_config = data['storage_config'] clean_cloudobjects = data['clean_cloudobjects'] storage = Storage(storage_config=storage_config) for job_key in jobs_to_clean: logger.info('Going to clean: {}'.format(job_key)) prefix = '/'.join([JOBS_PREFIX, job_key]) clean_bucket(storage, storage.bucket, prefix) if clean_cloudobjects: prefix = '/'.join([TEMP_PREFIX, job_key]) clean_bucket(storage, storage.bucket, prefix) if 'cos_to_clean' in data: logger.info('Going to clean cloudobjects') cos_to_clean = data['cos_to_clean'] storage_config = data['storage_config'] storage = Storage(storage_config=storage_config) for co in cos_to_clean: if co.backend == storage.backend: logging.info('Cleaning {}://{}/{}'.format( co.backend, co.bucket, co.key)) storage.delete_object(co.bucket, co.key) if os.path.exists(file_location): os.remove(file_location)
def __init__( self, executor: Executor, ds: Dataset, perf: Profiler, sm_config: Optional[Dict] = None, use_cache=False, store_images=True, ): """ Args ======== use_cache: For development - cache the results after each pipeline step so that it's easier to quickly re-run specific steps. """ sm_config = sm_config or SMConfig.get_conf() self.sm_storage = sm_config['lithops']['sm_storage'] self.storage = Storage(sm_config['lithops']) self.s3_client = get_s3_client() self.ds = ds self.perf = perf self.store_images = store_images self.db = DB() self.es = ESExporter(self.db, sm_config) self.imzml_cobj, self.ibd_cobj = _upload_imzmls_from_prefix_if_needed( self.ds.input_path, self.storage, self.sm_storage, self.s3_client) self.moldb_defs = _upload_moldbs_from_db( self.ds.config['database_ids'], self.storage, self.sm_storage) if use_cache: cache_key: Optional[str] = jsonhash({ 'input_path': ds.input_path, 'ds': ds.config }) else: cache_key = None self.pipe = Pipeline( self.imzml_cobj, self.ibd_cobj, self.moldb_defs, self.ds.config, cache_key=cache_key, executor=executor, ) self.results_dfs = None self.png_cobjs = None self.db_formula_image_ids = None
def clean_functions(functions_data): file_location = functions_data['file_location'] data = functions_data['data'] executor_id = data['fn_to_clean'] logger.info(f'Going to clean functions from {executor_id}') storage_config = data['storage_config'] storage = Storage(storage_config=storage_config) prefix = '/'.join([JOBS_PREFIX, executor_id]) + '/' key_list = storage.list_keys(storage.bucket, prefix) storage.delete_objects(storage.bucket, key_list) if os.path.exists(file_location): os.remove(file_location) logger.info('Finished')
def _load_object(self, data): """ Loads the object in /tmp in case of object processing """ extra_get_args = {} if 'url' in data: url = data['url'] logger.info('Getting dataset from {}'.format(url.path)) if url.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*url.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format( url.part, extra_get_args['Range'])) resp = requests.get(url.path, headers=extra_get_args, stream=True) url.data_stream = resp.raw if 'obj' in data: obj = data['obj'] logger.info('Getting dataset from {}://{}/{}'.format( obj.backend, obj.bucket, obj.key)) if obj.backend == self.internal_storage.backend: storage = self.internal_storage.storage else: storage = Storage(lithops_config=self.lithops_config, storage_backend=obj.backend) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) obj.data_stream = wsb else: sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = sb
def clean_cloudobjects(cloudobjects_data): file_location = cloudobjects_data['file_location'] data = cloudobjects_data['data'] logger.info('Going to clean cloudobjects') cos_to_clean = data['cos_to_clean'] storage_config = data['storage_config'] storage = Storage(storage_config=storage_config) for co in cos_to_clean: if co.backend == storage.backend: logging.info('Cleaning {}://{}/{}'.format(co.backend, co.bucket, co.key)) storage.delete_object(co.bucket, co.key) if os.path.exists(file_location): os.remove(file_location) logger.info('Finished')
def _fill_optional_args(self, function, data): """ Fills in those reserved, optional parameters that might be write to the function signature """ func_sig = inspect.signature(function) if len(data) == 1 and 'future' in data: # Function chaining feature out = [ data.pop('future').result( internal_storage=self.internal_storage) ] data.update(verify_args(function, out, None)[0]) if 'ibm_cos' in func_sig.parameters: if 'ibm_cos' in self.lithops_config: if self.internal_storage.backend == 'ibm_cos': ibm_boto3_client = self.internal_storage.get_client() else: ibm_boto3_client = Storage(config=self.lithops_config, backend='ibm_cos').get_client() data['ibm_cos'] = ibm_boto3_client else: raise Exception( 'Cannot create the ibm_cos client: missing configuration') if 'storage' in func_sig.parameters: data['storage'] = self.internal_storage.storage if 'rabbitmq' in func_sig.parameters: if 'rabbitmq' in self.lithops_config: rabbit_amqp_url = self.lithops_config['rabbitmq'].get( 'amqp_url') params = pika.URLParameters(rabbit_amqp_url) connection = pika.BlockingConnection(params) data['rabbitmq'] = connection else: raise Exception( 'Cannot create the rabbitmq client: missing configuration') if 'id' in func_sig.parameters: data['id'] = int(self.job.call_id)
def clean_executor_jobs(executor_id, executor_data): storage = None prefix = '/'.join([JOBS_PREFIX, executor_id]) for file_data in executor_data: file_location = file_data['file_location'] data = file_data['data'] storage_config = data['storage_config'] clean_cloudobjects = data['clean_cloudobjects'] if not storage: storage = Storage(storage_config=storage_config) logger.info( f'Cleaning jobs {", ".join([job_key for job_key in data["jobs_to_clean"]])}' ) objects = storage.list_keys(storage.bucket, prefix) objects_to_delete = [ key for key in objects if '-'.join( key.split('/')[1].split('-')[0:3]) in data['jobs_to_clean'] ] while objects_to_delete: storage.delete_objects(storage.bucket, objects_to_delete) time.sleep(5) objects = storage.list_keys(storage.bucket, prefix) objects_to_delete = [ key for key in objects if '-'.join( key.split('/')[1].split('-')[0:3]) in data['jobs_to_clean'] ] if clean_cloudobjects: for job_key in data['jobs_to_clean']: prefix = '/'.join([TEMP_PREFIX, job_key]) clean_bucket(storage, storage.bucket, prefix) if os.path.exists(file_location): os.remove(file_location) logger.info('Finished')
def get_arg_obj(idx_or_key): if isinstance(idx_or_key, str): obj_id = kwargs[idx_or_key] else: obj_id = args_as_list[idx_or_key] if obj_id in cache: logger.debug('Get {} (arg {}) from cache'.format( obj_id, idx_or_key)) obj = cache[obj_id] else: logger.debug('Get {} (arg {}) from storage'.format( obj_id, idx_or_key)) storage = Storage() obj_bin = storage.get_cloudobject(obj_id) obj = pickle.loads(obj_bin) cache[obj_id] = obj if isinstance(idx_or_key, str): kwargs[idx_or_key] = obj else: args_as_list[idx_or_key] = obj
def _load_object(self, data): """ Loads the object in /tmp in case of object processing """ extra_get_args = {} obj = data['obj'] if hasattr(obj, 'bucket') and not hasattr(obj, 'path'): logger.info('Getting dataset from {}://{}/{}'.format( obj.backend, obj.bucket, obj.key)) if obj.backend == self.internal_storage.backend: storage = self.internal_storage.storage else: storage = Storage(config=self.lithops_config, backend=obj.backend) if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range) obj.data_stream = wsb else: sb = storage.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args) obj.data_stream = sb elif hasattr(obj, 'url'): logger.info('Getting dataset from {}'.format(obj.url)) if obj.data_byte_range is not None: range_str = 'bytes={}-{}'.format(*obj.data_byte_range) extra_get_args['Range'] = range_str logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) resp = requests.get(obj.url, headers=extra_get_args, stream=True) obj.data_stream = resp.raw elif hasattr(obj, 'path'): logger.info('Getting dataset from {}'.format(obj.path)) with open(obj.path, "rb") as f: if obj.data_byte_range is not None: extra_get_args['Range'] = 'bytes={}-{}'.format( *obj.data_byte_range) logger.info('Chunk: {} - Range: {}'.format( obj.part, extra_get_args['Range'])) first_byte, last_byte = obj.data_byte_range f.seek(first_byte) buffer = io.BytesIO(f.read(last_byte - first_byte + 1)) sb = WrappedStreamingBodyPartition(buffer, obj.chunk_size, obj.data_byte_range) else: sb = io.BytesIO(f.read()) obj.data_stream = sb
def create_partitions(config, internal_storage, map_iterdata, chunk_size, chunk_number): """ Method that returns the function that will create the partitions of the objects in the Cloud """ logger.debug('Starting partitioner') parts_per_object = None sbs = set() buckets = set() prefixes = set() obj_names = set() urls = set() logger.debug("Parsing input data") for elem in map_iterdata: if 'url' in elem: urls.add(elem['url']) elif 'obj' in elem: if type(elem['obj']) == CloudObject: elem['obj'] = '{}://{}/{}'.format(elem['obj'].backend, elem['obj'].bucket, elem['obj'].key) sb, bucket, prefix, obj_name = utils.split_object_url(elem['obj']) if sb is None: sb = internal_storage.backend elem['obj'] = '{}://{}'.format(sb, elem['obj']) if obj_name: obj_names.add((bucket, prefix)) elif prefix: prefixes.add((bucket, prefix)) else: buckets.add(bucket) sbs.add(sb) if len(sbs) > 1: raise Exception('Currently we only support to process one storage backend at a time. ' 'Current storage backends: {}'.format(sbs)) if [prefixes, obj_names, urls, buckets].count(True) > 1: raise Exception('You must provide as an input data a list of bucktes, ' 'a list of buckets with object prefix, a list of keys ' 'or a list of urls. Intermingled types are not allowed.') if not urls: # process objects from an object store. No url sb = sbs.pop() if sb == internal_storage.backend: storage = internal_storage.storage else: storage = Storage(config=config, backend=sb) objects = {} if obj_names: for bucket, prefix in obj_names: logger.debug("Listing objects in '{}://{}/'" .format(sb, '/'.join([bucket, prefix]))) if bucket not in objects: objects[bucket] = [] prefix = prefix + '/' if prefix else prefix objects[bucket].extend(storage.list_objects(bucket, prefix)) elif prefixes: for bucket, prefix in prefixes: logger.debug("Listing objects in '{}://{}/'" .format(sb, '/'.join([bucket, prefix]))) if bucket not in objects: objects[bucket] = [] prefix = prefix + '/' if prefix else prefix objects[bucket].extend(storage.list_objects(bucket, prefix)) elif buckets: for bucket in buckets: logger.debug("Listing objects in '{}://{}'".format(sb, bucket)) objects[bucket] = storage.list_objects(bucket) keys_dict = {} for bucket in objects: keys_dict[bucket] = {} for obj in objects[bucket]: keys_dict[bucket][obj['Key']] = obj['Size'] if buckets or prefixes: partitions, parts_per_object = _split_objects_from_buckets(map_iterdata, keys_dict, chunk_size, chunk_number) elif obj_names: partitions, parts_per_object = _split_objects_from_keys(map_iterdata, keys_dict, chunk_size, chunk_number) elif urls: partitions, parts_per_object = _split_objects_from_urls(map_iterdata, chunk_size, chunk_number) else: raise ValueError('You did not provide any bucket or object key/url') return partitions, parts_per_object
def _split_objects_from_object_storage(map_func_args_list, chunk_size, chunk_number, internal_storage, config): """ Create partitions from a list of buckets or object keys """ if chunk_number: logger.debug('Chunk size set to {}'.format(chunk_size)) elif chunk_size: logger.debug('Chunk number set to {}'.format(chunk_number)) else: logger.debug('Chunk size and chunk number not set ') sbs = set() buckets = set() prefixes = set() obj_names = set() for elem in map_func_args_list: if type(elem['obj']) == CloudObject: elem['obj'] = '{}://{}/{}'.format(elem['obj'].backend, elem['obj'].bucket, elem['obj'].key) sb, bucket, prefix, obj_name = utils.split_object_url(elem['obj']) if sb is None: sb = internal_storage.backend elem['obj'] = '{}://{}'.format(sb, elem['obj']) if obj_name: obj_names.add((bucket, prefix)) elif prefix: prefixes.add((bucket, prefix)) else: buckets.add(bucket) sbs.add(sb) if len(sbs) > 1: raise Exception( 'Process objects from multiple storage backends is not supported. ' 'Current storage backends: {}'.format(sbs)) sb = sbs.pop() if sb == internal_storage.backend: storage = internal_storage.storage else: storage = Storage(config=config, backend=sb) objects = {} if obj_names: for bucket, prefix in obj_names: logger.debug("Listing objects in '{}://{}'".format( sb, '/'.join([bucket, prefix]))) if bucket not in objects: objects[bucket] = [] prefix = prefix + '/' if prefix else prefix objects[bucket].extend(storage.list_objects(bucket, prefix)) logger.debug("Total objects found: {}".format(len(objects[bucket]))) elif prefixes: for bucket, prefix in prefixes: logger.debug("Listing objects in '{}://{}'".format( sb, '/'.join([bucket, prefix]))) if bucket not in objects: objects[bucket] = [] prefix = prefix + '/' if prefix else prefix objects[bucket].extend(storage.list_objects(bucket, prefix)) logger.debug("Total objects found: {}".format(len(objects[bucket]))) elif buckets: for bucket in buckets: logger.debug("Listing objects in '{}://{}'".format(sb, bucket)) objects[bucket] = storage.list_objects(bucket) logger.debug("Total objects found: {}".format(len(objects[bucket]))) if all([len(objects[bucket]) == 0 for bucket in objects]): raise Exception( f'No objects found in bucket: {", ".join(objects.keys())}') keys_dict = {} for bucket in objects: keys_dict[bucket] = {} for obj in objects[bucket]: keys_dict[bucket][obj['Key']] = obj['Size'] partitions = [] parts_per_object = [] def create_partition(bucket, key, entry): if key.endswith('/'): logger.debug( f'Discarding object "{key}" as it is a prefix folder (0.0B)') return obj_size = keys_dict[bucket][key] if chunk_number: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size: obj_chunk_size = chunk_size else: obj_chunk_size = obj_size size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug('Creating {} partitions from object {} ({})'.format( parts, key, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['obj'] = CloudObject(sb, bucket, key) partition['obj'].data_byte_range = brange partition['obj'].chunk_size = obj_chunk_size partition['obj'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions) for entry in map_func_args_list: sb, bucket, prefix, obj_name = utils.split_object_url(entry['obj']) if obj_name: # each entry is an object key key = '/'.join([prefix, obj_name]) if prefix else obj_name create_partition(bucket, key, entry) else: # each entry is a bucket for key in keys_dict[bucket]: create_partition(bucket, key, entry) return partitions, parts_per_object