def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None): self._ctx = context or get_context() # self._setup_queues() self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") if processes is not None: if self._initargs: self._executor = FunctionExecutor(workers=processes, **self._initargs) else: self._executor = FunctionExecutor(workers=processes) self._processes = processes else: if self._initargs: self._executor = FunctionExecutor(**self._initargs) else: self._executor = FunctionExecutor() self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable') self._pool = []
def __init__(self, processes=None, initializer=None, initargs=None, maxtasksperchild=None, context=None): if initargs is None: initargs = () self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs self._remote_logger = None if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") lithops_conf = mp_config.get_parameter(mp_config.LITHOPS_CONFIG) if processes is not None: self._processes = processes self._executor = FunctionExecutor(workers=processes, **lithops_conf) else: self._executor = FunctionExecutor(**lithops_conf) self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable')
def __init__(self, group=None, target=None, name=None, args=None, kwargs=None, *, daemon=None): assert group is None, 'process grouping is not implemented' count = next(_process_counter) if args is None: args = () if kwargs is None: kwargs = {} self._config = {} self._identity = count self._parent_pid = os.getpid() self._target = target self._args = tuple(args) self._kwargs = dict(kwargs) self._name = name or (type(self).__name__ + '-' + str(self._identity)) if daemon is not None: self.daemon = daemon lithops_config = mp_config.get_parameter(mp_config.LITHOPS_CONFIG) self._executor = FunctionExecutor(**lithops_config) self._forked = False self._sentinel = object() self._remote_logger = None self._redis = util.get_redis_client()
def benchmark(workers, memory, loopcount, matn): iterable = [(loopcount, matn) for i in range(workers)] fexec = FunctionExecutor(runtime_memory=memory) start_time = time.time() worker_futures = fexec.map(compute_flops, iterable) results = fexec.get_result() end_time = time.time() worker_stats = [f.stats for f in worker_futures] total_time = end_time - start_time print("Total time:", round(total_time, 3)) est_flops = workers * 2 * loopcount * matn**3 print('Estimated GFLOPS:', round(est_flops / 1e9 / total_time, 4)) res = { 'start_time': start_time, 'total_time': total_time, 'est_flops': est_flops, 'worker_stats': worker_stats, 'results': results } return res
def get_conn(self, lithops_executor_config): """ Initializes Lithops executor. """ lithops_executor_config['log_level'] = 'DEBUG' lithops_executor_config['config'] = self.lithops_config return FunctionExecutor(**lithops_executor_config)
def validate_command(prefix, image): storage_client = Storage() with FunctionExecutor(runtime=image) as fexec: bucket = fexec.config['lithops']['storage_bucket'] key_list = storage_client.list_keys(bucket, prefix + '/') validate_records_futures = fexec.map(validate_records, key_list, extra_args=[bucket, prefix], include_modules=['util']) results = fexec.get_result(fs=validate_records_futures) for index, r in enumerate(results): if not r['success']: print(f'Failed to validate partition: {key_list[index]}') print(r['stderr']) return validate_summaries_futures = fexec.map(validate_summaries, [prefix + summary_postfix], extra_args=[bucket], include_modules=['util']) results = fexec.get_result(fs=validate_summaries_futures) if results[0] == '': print('Success!') else: print(results)
def read(backend, bucket_name, number, keylist_raw, read_times): blocksize = 1024 * 1024 def read_object(key_name, storage): m = hashlib.md5() bytes_read = 0 print(key_name) start_time = time.time() for unused in range(read_times): fileobj = storage.get_object(bucket_name, key_name, stream=True) try: buf = fileobj.read(blocksize) while len(buf) > 0: bytes_read += len(buf) #if bytes_read % (blocksize *10) == 0: # mb_rate = bytes_read/(time.time()-t1)/1e6 # print('POS:'+str(bytes_read)+' MB Rate: '+ str(mb_rate)) m.update(buf) buf = fileobj.read(blocksize) except Exception as e: print(e) pass end_time = time.time() mb_rate = bytes_read / (end_time - start_time) / 1e6 print('MB Rate: ' + str(mb_rate)) return { 'start_time': start_time, 'end_time': end_time, 'mb_rate': mb_rate, 'bytes_read': bytes_read } if number == 0: keynames = keylist_raw else: keynames = [keylist_raw[i % len(keylist_raw)] for i in range(number)] fexec = FunctionExecutor(backend=backend, runtime_memory=1024) start_time = time.time() worker_futures = fexec.map(read_object, keynames) results = fexec.get_result() end_time = time.time() total_time = end_time - start_time worker_stats = [f.stats for f in worker_futures] res = { 'start_time': start_time, 'total_time': total_time, 'worker_stats': worker_stats, 'results': results } return res
def write(backend, bucket_name, mb_per_file, number, key_prefix): def write_object(key_name, storage): bytes_n = mb_per_file * 1024**2 d = RandomDataGenerator(bytes_n) print(key_name) start_time = time.time() storage.put_object(bucket_name, key_name, d) end_time = time.time() mb_rate = bytes_n / (end_time - start_time) / 1e6 print('MB Rate: ' + str(mb_rate)) return { 'start_time': start_time, 'end_time': end_time, 'mb_rate': mb_rate } # create list of random keys keynames = [ key_prefix + str(uuid.uuid4().hex.upper()) for unused in range(number) ] fexec = FunctionExecutor(backend=backend, runtime_memory=1024) start_time = time.time() worker_futures = fexec.map(write_object, keynames) results = fexec.get_result() end_time = time.time() worker_stats = [f.stats for f in worker_futures] total_time = end_time - start_time res = { 'start_time': start_time, 'total_time': total_time, 'worker_stats': worker_stats, 'bucket_name': bucket_name, 'keynames': keynames, 'results': results } return res
def generate_command(number, prefix, partitions, image): bucket = None with FunctionExecutor(runtime=image) as fexec: bucket = fexec.config['lithops']['storage_bucket'] futures = fexec.map(generate_records, range(partitions), extra_args=[number, prefix], include_modules=['util']) results = fexec.get_result(fs=futures) # print(results) partition_size = record_size * number # Check if all files have been uploaded storage_client = Storage() partition_list = storage_client.list_objects(bucket, prefix + '/') assert len( partition_list ) == partitions, f'partition_list: {len(partition_list)}; partitions: {partitions}' for info in partition_list: assert info[ 'Size'] == partition_size, f'partition size: {partition_size} \ninfo: {info}' print('Done!')
BUCKET_NAME = 'lithops-sample-data' # change-me def my_function(obj_id, storage): print(obj_id) data = storage.get_cloudobject(obj_id) return data.decode() if __name__ == '__main__': obj_key = 'cloudobject1.txt' storage = Storage() obj_id = storage.put_cloudobject('Hello World!', BUCKET_NAME, obj_key) print(obj_id) fexec = FunctionExecutor() fexec.call_async(my_function, obj_id) print(fexec.get_result()) obj_key = 'cloudobject2.txt' storage = fexec.storage obj_id = storage.put_cloudobject('Hello World!', BUCKET_NAME, obj_key) print(obj_id) fexec.call_async(my_function, obj_id) print(fexec.get_result())
def __init__(self, process_obj): util._flush_std_streams() self.returncode = None self._executor = FunctionExecutor() self._launch(process_obj)
def _create_executor(self): if not self.executor: from lithops import FunctionExecutor self.executor = FunctionExecutor(config=self.config)
def sort_command(input_prefix, output_prefix, max_parallelism, image): storage_client = Storage() bucket = None input_info_lis = None with FunctionExecutor(runtime=image, workers=max_parallelism) as fexec: bucket = fexec.config['lithops']['storage_bucket'] input_info_list = storage_client.list_objects(bucket, input_prefix + '/') input_size = sum(info['Size'] for info in input_info_list) (num_shuffles, last_values_per_category) = make_plan(input_size) current_values_per_category = 1 current_prefix = input_prefix current_keys_list = [{ 'keys_list': [key_name], 'prefix': input_prefix + '-intermediate0', 'category_stack': [] } for key_name in storage_client.list_keys(bucket, input_prefix + '/')] for current_shuffle in range(num_shuffles): # Change values per category of last shuffle if current_shuffle == num_shuffles - 1: current_values_per_category = last_values_per_category radix_sort_futures = fexec.map(radix_sort_by_byte, current_keys_list, extra_args={ 'values_per_category': current_values_per_category }, include_modules=['util']) radix_sort_results = fexec.get_result(fs=radix_sort_futures) categories_keys_lists = {} for res in radix_sort_results: intermediate_keys_list = res['keys_list'] input_category_stack = res['category_stack'] for key_name in intermediate_keys_list: category_id = int(key_name.rsplit(sep='/', maxsplit=3)[-3]) new_category_stack = input_category_stack + [category_id] new_category_stack_str = '/'.join( [str(x) for x in new_category_stack]) if new_category_stack_str in categories_keys_lists: categories_keys_lists[new_category_stack_str].append( key_name) else: categories_keys_lists[new_category_stack_str] = [ key_name ] # Partition category lists # Attach prefix metadata so that sorter knows what to name files each_category_size = input_size / ( (256 / current_values_per_category) * (current_shuffle + 1)) num_partitions_per_category = math.ceil(each_category_size / buffer_size_to_categorize) current_keys_list = [] for category_stack_str, cat_keys_list in categories_keys_lists.items( ): for sub_list in np.array_split(cat_keys_list, num_partitions_per_category): partition_entry = { 'keys_list': sub_list, 'prefix': f'{input_prefix}-intermediate{str(current_shuffle + 1)}', 'category_stack': [int(x) for x in category_stack_str.split('/')] } current_keys_list.append(partition_entry) consider_last_byte_sorted = False if last_values_per_category == 1: consider_last_byte_sorted = True for entry in current_keys_list: entry['prefix'] = output_prefix sorted_keys_list = sorted(current_keys_list, key=lambda x: x['category_stack']) sort_category_futures = fexec.map(sort_category, sorted_keys_list, extra_args={ 'consider_last_byte_sorted': consider_last_byte_sorted }, include_modules=['util']) results = fexec.get_result(fs=sort_category_futures) # print(results) # Check if size of output matches size of input output_info_list = storage_client.list_objects(bucket, output_prefix) output_size = sum(info['Size'] for info in output_info_list) assert input_size == output_size, f'input size: {input_size}, output_size: {output_size}' print('Done!')
def delete_temp_data(bucket_name, keynames): fexec = FunctionExecutor(runtime_memory=1024) print('Deleting temp files...') fexec.storage.delete_objects(bucket_name, keynames) print('Done!')