def test_call_async(self): pw = pywren.ibm_cf_executor() pw.call_async(self.hello_world, "") result = pw.get_result() self.assertEqual(result, "Hello World!") pw = pywren.ibm_cf_executor() pw.call_async(self.simple_map_function, [4, 6]) result = pw.get_result() self.assertEqual(result, 10) pw = pywren.ibm_cf_executor() pw.call_async(self.simple_map_function, {'x': 2, 'y': 8}) result = pw.get_result() self.assertEqual(result, 10)
def pywren_test(): runtime = "spitfiredd/pywren-flaskapi-runtime:3.7" iterdata = [1, 2, 3, 4] PYWREN_CONFIG = { 'pywren': { 'storage_bucket': os.environ.get('PYWREN_STORAGE_BUCKET') }, 'ibm_cf': { 'endpoint': os.environ.get('PYWREN_IBM_CF_ENDPOINT'), 'namespace': os.environ.get('PYWREN_IBM_CF_NAMESPACE'), 'api_key': os.environ.get('PYWREN_IBM_CF_API_KEY') }, 'ibm_cos': { 'endpoint': os.environ.get('PYWREN_IBM_COS_ENDPOINT'), 'api_key': os.environ.get('PYWREN_IBM_COS_API_KEY') } } def my_map_function(x): return x + 7 def my_reduce_function(results): total = 0 for map_result in results: total = total + map_result return total pw = pywren.ibm_cf_executor(config=PYWREN_CONFIG, runtime=runtime) pw.map_reduce(my_map_function, iterdata, my_reduce_function) result = pw.get_result() return {'result': result}
def test_map_reduce(self): iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]] pw = pywren.ibm_cf_executor() pw.map_reduce(self.simple_map_function, iterdata, self.simple_reduce_function) result = pw.get_result() self.assertEqual(result, 20)
def partitioner(map_func_args, chunk_size, storage_handler): logger.info('Starting partitioner() function') map_func_keys = map_func_args[0].keys() if 'bucket' in map_func_keys and not 'key' in map_func_keys: partitions = split_objects_from_bucket(map_func_args, chunk_size, storage_handler) elif 'key' in map_func_keys: partitions = split_object_from_key(map_func_args, chunk_size, storage_handler) elif 'url' in map_func_keys: partitions = split_object_from_url(map_func_args, chunk_size) else: raise ValueError( 'You did not provide any bucket or object key/url') # logger.info(partitions) pw = pywren.ibm_cf_executor() reduce_future = pw.map_reduce(map_func, partitions, reduce_function, reducer_wait_local=False, throw_except=throw_except, extra_meta=extra_meta) return reduce_future
def test_map_reduce_cos_key(self): bucket_name = STORAGE_CONFIG['storage_bucket'] iterdata = [bucket_name + '/' + key for key in list_test_keys()] pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_key, iterdata, my_reduce_function) result = pw.get_result() self.checkResult(result)
def main(): path = None if len(sys.argv) >= 2: path = sys.argv[1] backend = cos.Backend(pywren_config['ibm_cos'], pywren_config['pywren']['storage_bucket']) pw = pywren.ibm_cf_executor(config=pywren_config) if path == None: execute(pw, backend, N_SLAVES, MASTER_DELAY, SLAVES_DELAY) return num_slaves = N_SLAVES num_samples = N_SAMPLES if len(sys.argv) >= 3: num_slaves = int(sys.argv[2]) if len(sys.argv) >= 4: num_samples = int(sys.argv[3]) with open(path, "w") as f: f.write("slaves,time\n") for slaves in range(1, num_slaves + 1, int(num_slaves / num_samples)): correct, execution_time = execute(pw, backend, slaves, MASTER_DELAY, SLAVES_DELAY) print(slaves, getRequests(backend)) if not correct: print("ERROR: Incorrect execution sequence", file=sys.stderr) exit(1) f.write("{},{}\n".format(slaves, execution_time))
def test_cloudobject(self): print('Testing cloudobjects...') data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/' pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_cloudobject_put, data_prefix, my_cloudobject_get) result = pw.get_result() self.checkResult(result)
def test_map_reduce_cos_bucket_one_reducer_per_object(self): print('Testing map_reduce() over a COS bucket with one reducer per object...') data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/' pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_obj, data_prefix, my_reduce_function, reducer_one_per_object=True) result = pw.get_result() self.checkResult(result)
def test_map_reduce(self): print('Testing map_reduce()...') iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]] pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(simple_map_function, iterdata, simple_reduce_function) result = pw.get_result() self.assertEqual(result, 20)
def test_internal_executions(self): print('Testing internal executions...') pw = pywren.ibm_cf_executor(config=CONFIG) pw.map(pywren_inside_pywren_map_function1, range(1, 11)) result = pw.get_result() self.assertEqual(result, [0] + [list(range(i)) for i in range(2, 11)]) pw = pywren.ibm_cf_executor(config=CONFIG) pw.call_async(pywren_inside_pywren_map_function2, 10) result = pw.get_result() self.assertEqual(result, 10) pw = pywren.ibm_cf_executor(config=CONFIG) pw.map(pywren_inside_pywren_map_function3, range(1, 11)) result = pw.get_result() self.assertEqual(result, [[0, 0]] + [[list(range(i)), list(range(i))] for i in range(2, 11)])
def test_map_reduce_url(self): chunk_size = 4 * 1024**2 # 4MB pw = pywren.ibm_cf_executor() pw.map_reduce(self.my_map_function_url, TEST_FILES_URLS, self.my_reduce_function, chunk_size) result = pw.get_result() self.checkResult(initCos(), result + 1)
def test_call_async(self): print('Testing call_async()...') pw = pywren.ibm_cf_executor(config=CONFIG) pw.call_async(hello_world, "") result = pw.get_result() self.assertEqual(result, "Hello World!") pw = pywren.ibm_cf_executor(config=CONFIG) pw.call_async(simple_map_function, [4, 6]) result = pw.get_result() self.assertEqual(result, 10) pw = pywren.ibm_cf_executor(config=CONFIG) pw.call_async(simple_map_function, {'x': 2, 'y': 8}) result = pw.get_result() self.assertEqual(result, 10)
def test_storage_handler(self): print('Testing ibm_cos function arg...') iterdata = [[key, STORAGE_CONFIG['bucket']] for key in list_test_keys()] pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_ibm_cos, iterdata, my_reduce_function) result = pw.get_result() self.checkResult(result)
def pywren_inside_pywren_map_function2(x): def _func(x): return x pw = pywren.ibm_cf_executor(config=CONFIG) pw.call_async(_func, x) return pw.get_result()
def pywren_inside_pywren_map_function1(x): def _func(x): return x pw = pywren.ibm_cf_executor(config=CONFIG) pw.map(_func, range(x)) return pw.get_result()
def test_chunks_bucket(self): print('Testing cunk_size on a bucket...') data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/' pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_obj, data_prefix, my_reduce_function, chunk_size=1*1024**2) result = pw.get_result() self.checkResult(result)
def object_partitioner_function(map_func_args, chunk_size, storage): """ Partitioner is a function executed in the Cloud to create partitions from objects """ logger.info('Starting partitioner() function') map_func_keys = map_func_args[0].keys() if 'bucket' in map_func_keys and 'key' not in map_func_keys: partitions = split_objects_from_bucket(map_func_args, chunk_size, storage) if not partitions: raise Exception('No objects available within bucket: {}'.format(map_func_args[0]['bucket'])) elif 'key' in map_func_keys: partitions = split_object_from_key(map_func_args, chunk_size, storage) elif 'url' in map_func_keys: partitions = split_object_from_url(map_func_args, chunk_size) else: raise ValueError('You did not provide any bucket or object key/url') #logger.info(partitions) pw = pywren.ibm_cf_executor() futures = pw.map_reduce(map_function_wrapper, partitions, reduce_function, reducer_wait_local=False, extra_env=extra_env, extra_meta=extra_meta) return futures
def remote_invoker(input_data): pw = pywren.ibm_cf_executor() return pw.map(map_function, input_data, runtime_memory=runtime_memory, invoke_pool_threads=invoke_pool_threads, extra_env=extra_env)
def test_map_reduce_cos_bucket(self): data_prefix = CONFIG['pywren']['storage_bucket'] + '/' + PREFIX chunk_size = 4 * 1024**2 # 4MB pw = pywren.ibm_cf_executor() pw.map_reduce(self.my_map_function_bucket, data_prefix, self.my_reduce_function, chunk_size) result = pw.get_result() self.checkResult(initCos(), result)
def remote_invoker(input_data): pw = pywren.ibm_cf_executor(runtime=runtime_name, runtime_memory=runtime_memory, rabbitmq_monitor=rabbitmq_monitor) return pw.map(map_function, input_data, invoke_pool_threads=invoke_pool_threads, extra_env=extra_env, extra_meta=extra_meta)
def pywren_return_futures_map_function3(x): def _func(x): return x + 1 pw = pywren.ibm_cf_executor() fut1 = pw.map(_func, range(x)) fut2 = pw.map(_func, range(x)) return fut1 + fut2
def test_map_reduce_cos_key_one_reducer_per_object(self): print('Testing map_reduce() over COS keys with one reducer per object...') bucket_name = STORAGE_CONFIG['bucket'] iterdata = [bucket_name + '/' + key for key in list_test_keys()] pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_obj, iterdata, my_reduce_function, reducer_one_per_object=True) result = pw.get_result() self.checkResult(result)
def pywren_inside_pywren_map_function3(x): def _func(x): return x pw = pywren.ibm_cf_executor(config=CONFIG) fut1 = pw.map(_func, range(x)) fut2 = pw.map(_func, range(x)) return [pw.get_result(fut1), pw.get_result(fut2)]
def test_map_reduce_cos_bucket_one_reducer_per_object(self): data_prefix = STORAGE_CONFIG['storage_bucket'] + '/' + PREFIX pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_bucket, data_prefix, my_reduce_function, reducer_one_per_object=True) result = pw.get_result() self.checkResult(result)
def test_chunks_bucket(self): data_prefix = STORAGE_CONFIG['storage_bucket'] + '/' + PREFIX pw = pywren.ibm_cf_executor(config=CONFIG) pw.map_reduce(my_map_function_bucket, data_prefix, my_reduce_function, chunk_size=1 * 1024**2) result = pw.get_result() self.checkResult(result)
def main(): try: nmaps = int(sys.argv[1]) except: print('Usage:\n\tpython3 main.py NUM_MAPS\n') exit(2) if nmaps < 1: print('ERROR: the number of maps must be higher than 1\n') exit(3) pw = pywren.ibm_cf_executor(rabbitmq_monitor=True) params = pika.URLParameters(pw.config['rabbitmq']['amqp_url']) connection = pika.BlockingConnection(params) channel = connection.channel() channel.exchange_declare(exchange='fan_logs', exchange_type='fanout') pw.call_async(master, nmaps) slave_list = [] for i in range(nmaps): slave_list.append([nmaps, i]) pw_s = pywren.ibm_cf_executor(rabbitmq_monitor=True) pw_s.map(slave, slave_list) results = pw_s.get_result() i = 0 equal = len(set(map(tuple, results))) == 1 for res_list in results: print(f'List {i}: {res_list}') i += 1 if equal: print('\nIT WORKS!') else: print('\nThe lists don\'t match...') connection.close()
def post(self): data = api.payload iterdata = data['iterdata'] pw = pywren.ibm_cf_executor(config=c_app.config.get('PYWREN_CONFIG'), runtime=c_app.config.get('PYWREN_RUNTIME')) pw.map_reduce(my_map_function, iterdata, my_reduce_function) result = pw.get_result() return jsonify({'result': result})
def main(): args = [] if len(sys.argv) != 2: print("ERROR: Insuficientes agrumentos") exit(1) try: N = int(sys.argv[1]) except: print("ERROR: El parĂ¡metro ha de ser un entero") exit(2) pw = pywren.ibm_cf_executor(rabbitmq_monitor=True) params = pika.URLParameters(pw.config['rabbitmq']['amqp_url']) connection = pika.BlockingConnection(params) channel = connection.channel() # start a channel channel.exchange_declare(exchange='publish_subscribe', exchange_type='fanout') pw.call_async(my_function_leader, N) pw = pywren.ibm_cf_executor(rabbitmq_monitor=True) d = {'N': N} for i in range(N): d['id'] = i args.append(d.copy()) pw.map(my_function_slave, args) results = pw.get_result() print(results) channel.exchange_delete(exchange='publish_subscribe', if_unused=False) connection.close() exit(0)
def test_map_reduce_cos_key(self): cos = initCos() bucket_name = CONFIG['pywren']['storage_bucket'] iterdata = [ bucket_name + '/' + key for key in getFilenamesFromCOS(cos, bucket_name, PREFIX) ] chunk_size = 4 * 1024**2 # 4MB pw = pywren.ibm_cf_executor() pw.map_reduce(self.my_map_function_key, iterdata, self.my_reduce_function, chunk_size) result = pw.get_result() self.checkResult(cos, result)
def calculate_centroids(config, input_db, polarity='+', isocalc_sigma=0.001238): bucket = config["storage"]["db_bucket"] formulas_chunks_prefix = input_db["formulas_chunks"] centroids_chunks_prefix = input_db["centroids_chunks"] clean_from_cos(config, bucket, centroids_chunks_prefix) def calculate_peaks_for_formula(formula_i, formula): mzs, ints = isocalc_wrapper.centroids(formula) if mzs is not None: return list(zip(repeat(formula_i), range(len(mzs)), mzs, ints)) else: return [] def calculate_peaks_chunk(obj, id, storage): print(f'Calculating peaks from formulas chunk {obj.key}') chunk_df = pd.read_msgpack(obj.data_stream._raw_stream) peaks = [peak for formula_i, formula in chunk_df.formula.items() for peak in calculate_peaks_for_formula(formula_i, formula)] peaks_df = pd.DataFrame(peaks, columns=['formula_i', 'peak_i', 'mz', 'int']) peaks_df.set_index('formula_i', inplace=True) print(f'Storing centroids chunk {id}') centroids_chunk_key = f'{centroids_chunks_prefix}/{id}.msgpack' storage.put_object(Bucket=bucket, Key=centroids_chunk_key, Body=peaks_df.to_msgpack()) return peaks_df.shape[0] from annotation_pipeline.isocalc_wrapper import IsocalcWrapper # Import lazily so that the rest of the pipeline still works if the dependency is missing isocalc_wrapper = IsocalcWrapper({ # These instrument settings are usually customized on a per-dataset basis out of a set of # 18 possible combinations, but most of EMBL's datasets are compatible with the following settings: 'charge': { 'polarity': polarity, 'n_charges': 1, }, 'isocalc_sigma': float(f"{isocalc_sigma:f}") # Rounding to match production implementation }) pw = pywren.ibm_cf_executor(config=config) memory_capacity_mb = 2048 futures = pw.map(calculate_peaks_chunk, f'cos://{bucket}/{formulas_chunks_prefix}/', runtime_memory=memory_capacity_mb) centroids_chunks_n = pw.get_result(futures) append_pywren_stats(futures, memory_mb=memory_capacity_mb, cloud_objects_n=len(futures)) num_centroids = sum(centroids_chunks_n) n_centroids_chunks = len(centroids_chunks_n) logger.info(f'Calculated {num_centroids} centroids in {n_centroids_chunks} chunks') return num_centroids, n_centroids_chunks