Esempio n. 1
0
    def test_call_async(self):
        pw = pywren.ibm_cf_executor()
        pw.call_async(self.hello_world, "")
        result = pw.get_result()
        self.assertEqual(result, "Hello World!")

        pw = pywren.ibm_cf_executor()
        pw.call_async(self.simple_map_function, [4, 6])
        result = pw.get_result()
        self.assertEqual(result, 10)

        pw = pywren.ibm_cf_executor()
        pw.call_async(self.simple_map_function, {'x': 2, 'y': 8})
        result = pw.get_result()
        self.assertEqual(result, 10)
Esempio n. 2
0
def pywren_test():
    runtime = "spitfiredd/pywren-flaskapi-runtime:3.7"
    iterdata = [1, 2, 3, 4]

    PYWREN_CONFIG = {
        'pywren': {
            'storage_bucket': os.environ.get('PYWREN_STORAGE_BUCKET')
        },
        'ibm_cf': {
            'endpoint': os.environ.get('PYWREN_IBM_CF_ENDPOINT'),
            'namespace': os.environ.get('PYWREN_IBM_CF_NAMESPACE'),
            'api_key': os.environ.get('PYWREN_IBM_CF_API_KEY')
        },
        'ibm_cos': {
            'endpoint': os.environ.get('PYWREN_IBM_COS_ENDPOINT'),
            'api_key': os.environ.get('PYWREN_IBM_COS_API_KEY')
        }
    }

    def my_map_function(x):
        return x + 7

    def my_reduce_function(results):
        total = 0
        for map_result in results:
            total = total + map_result
        return total

    pw = pywren.ibm_cf_executor(config=PYWREN_CONFIG, runtime=runtime)
    pw.map_reduce(my_map_function, iterdata, my_reduce_function)
    result = pw.get_result()
    return {'result': result}
Esempio n. 3
0
 def test_map_reduce(self):
     iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]]
     pw = pywren.ibm_cf_executor()
     pw.map_reduce(self.simple_map_function, iterdata,
                   self.simple_reduce_function)
     result = pw.get_result()
     self.assertEqual(result, 20)
Esempio n. 4
0
        def partitioner(map_func_args, chunk_size, storage_handler):
            logger.info('Starting partitioner() function')
            map_func_keys = map_func_args[0].keys()

            if 'bucket' in map_func_keys and not 'key' in map_func_keys:
                partitions = split_objects_from_bucket(map_func_args,
                                                       chunk_size,
                                                       storage_handler)
            elif 'key' in map_func_keys:
                partitions = split_object_from_key(map_func_args, chunk_size,
                                                   storage_handler)
            elif 'url' in map_func_keys:
                partitions = split_object_from_url(map_func_args, chunk_size)
            else:
                raise ValueError(
                    'You did not provide any bucket or object key/url')

            # logger.info(partitions)

            pw = pywren.ibm_cf_executor()
            reduce_future = pw.map_reduce(map_func,
                                          partitions,
                                          reduce_function,
                                          reducer_wait_local=False,
                                          throw_except=throw_except,
                                          extra_meta=extra_meta)

            return reduce_future
 def test_map_reduce_cos_key(self):
     bucket_name = STORAGE_CONFIG['storage_bucket']
     iterdata = [bucket_name + '/' + key for key in list_test_keys()]
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_key, iterdata, my_reduce_function)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 6
0
def main():
    path = None
    if len(sys.argv) >= 2:
        path = sys.argv[1]

    backend = cos.Backend(pywren_config['ibm_cos'],
                          pywren_config['pywren']['storage_bucket'])
    pw = pywren.ibm_cf_executor(config=pywren_config)

    if path == None:
        execute(pw, backend, N_SLAVES, MASTER_DELAY, SLAVES_DELAY)
        return

    num_slaves = N_SLAVES
    num_samples = N_SAMPLES

    if len(sys.argv) >= 3:
        num_slaves = int(sys.argv[2])

    if len(sys.argv) >= 4:
        num_samples = int(sys.argv[3])

    with open(path, "w") as f:
        f.write("slaves,time\n")
        for slaves in range(1, num_slaves + 1, int(num_slaves / num_samples)):
            correct, execution_time = execute(pw, backend, slaves,
                                              MASTER_DELAY, SLAVES_DELAY)

            print(slaves, getRequests(backend))

            if not correct:
                print("ERROR: Incorrect execution sequence", file=sys.stderr)
                exit(1)

            f.write("{},{}\n".format(slaves, execution_time))
Esempio n. 7
0
 def test_cloudobject(self):
     print('Testing cloudobjects...')
     data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/'
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_cloudobject_put, data_prefix, my_cloudobject_get)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 8
0
 def test_map_reduce_cos_bucket_one_reducer_per_object(self):
     print('Testing map_reduce() over a COS bucket with one reducer per object...')
     data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/'
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_obj, data_prefix, my_reduce_function, reducer_one_per_object=True)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 9
0
 def test_map_reduce(self):
     print('Testing map_reduce()...')
     iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]]
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(simple_map_function, iterdata, simple_reduce_function)
     result = pw.get_result()
     self.assertEqual(result, 20)
Esempio n. 10
0
    def test_internal_executions(self):
        print('Testing internal executions...')
        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.map(pywren_inside_pywren_map_function1, range(1, 11))
        result = pw.get_result()
        self.assertEqual(result, [0] + [list(range(i)) for i in range(2, 11)])

        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.call_async(pywren_inside_pywren_map_function2, 10)
        result = pw.get_result()
        self.assertEqual(result, 10)

        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.map(pywren_inside_pywren_map_function3, range(1, 11))
        result = pw.get_result()
        self.assertEqual(result, [[0, 0]] + [[list(range(i)), list(range(i))] for i in range(2, 11)])
Esempio n. 11
0
 def test_map_reduce_url(self):
     chunk_size = 4 * 1024**2  # 4MB
     pw = pywren.ibm_cf_executor()
     pw.map_reduce(self.my_map_function_url, TEST_FILES_URLS,
                   self.my_reduce_function, chunk_size)
     result = pw.get_result()
     self.checkResult(initCos(), result + 1)
Esempio n. 12
0
    def test_call_async(self):
        print('Testing call_async()...')
        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.call_async(hello_world, "")
        result = pw.get_result()
        self.assertEqual(result, "Hello World!")

        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.call_async(simple_map_function, [4, 6])
        result = pw.get_result()
        self.assertEqual(result, 10)

        pw = pywren.ibm_cf_executor(config=CONFIG)
        pw.call_async(simple_map_function, {'x': 2, 'y': 8})
        result = pw.get_result()
        self.assertEqual(result, 10)
Esempio n. 13
0
 def test_storage_handler(self):
     print('Testing ibm_cos function arg...')
     iterdata = [[key, STORAGE_CONFIG['bucket']] for key in list_test_keys()]
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_ibm_cos, iterdata, my_reduce_function)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 14
0
def pywren_inside_pywren_map_function2(x):
    def _func(x):
        return x

    pw = pywren.ibm_cf_executor(config=CONFIG)
    pw.call_async(_func, x)
    return pw.get_result()
Esempio n. 15
0
def pywren_inside_pywren_map_function1(x):
    def _func(x):
        return x

    pw = pywren.ibm_cf_executor(config=CONFIG)
    pw.map(_func, range(x))
    return pw.get_result()
Esempio n. 16
0
 def test_chunks_bucket(self):
     print('Testing cunk_size on a bucket...')
     data_prefix = STORAGE_CONFIG['bucket'] + '/' + PREFIX + '/'
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_obj, data_prefix, my_reduce_function, chunk_size=1*1024**2)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 17
0
    def object_partitioner_function(map_func_args, chunk_size, storage):
        """
        Partitioner is a function executed in the Cloud to create partitions from objects
        """
        logger.info('Starting partitioner() function')
        map_func_keys = map_func_args[0].keys()
    
        if 'bucket' in map_func_keys and 'key' not in map_func_keys:
            partitions = split_objects_from_bucket(map_func_args, chunk_size, storage)
            if not partitions:
                raise Exception('No objects available within bucket: {}'.format(map_func_args[0]['bucket']))

        elif 'key' in map_func_keys:
            partitions = split_object_from_key(map_func_args, chunk_size, storage)
        
        elif 'url' in map_func_keys:
            partitions = split_object_from_url(map_func_args, chunk_size)

        else:
            raise ValueError('You did not provide any bucket or object key/url')

        #logger.info(partitions)
        pw = pywren.ibm_cf_executor()
        futures = pw.map_reduce(map_function_wrapper, partitions,
                                reduce_function,
                                reducer_wait_local=False,
                                extra_env=extra_env,
                                extra_meta=extra_meta)
        return futures
Esempio n. 18
0
 def remote_invoker(input_data):
     pw = pywren.ibm_cf_executor()
     return pw.map(map_function,
                   input_data,
                   runtime_memory=runtime_memory,
                   invoke_pool_threads=invoke_pool_threads,
                   extra_env=extra_env)
Esempio n. 19
0
 def test_map_reduce_cos_bucket(self):
     data_prefix = CONFIG['pywren']['storage_bucket'] + '/' + PREFIX
     chunk_size = 4 * 1024**2  # 4MB
     pw = pywren.ibm_cf_executor()
     pw.map_reduce(self.my_map_function_bucket, data_prefix,
                   self.my_reduce_function, chunk_size)
     result = pw.get_result()
     self.checkResult(initCos(), result)
Esempio n. 20
0
 def remote_invoker(input_data):
     pw = pywren.ibm_cf_executor(runtime=runtime_name,
                                 runtime_memory=runtime_memory,
                                 rabbitmq_monitor=rabbitmq_monitor)
     return pw.map(map_function, input_data,
                   invoke_pool_threads=invoke_pool_threads,
                   extra_env=extra_env,
                   extra_meta=extra_meta)
Esempio n. 21
0
    def pywren_return_futures_map_function3(x):
        def _func(x):
            return x + 1

        pw = pywren.ibm_cf_executor()
        fut1 = pw.map(_func, range(x))
        fut2 = pw.map(_func, range(x))
        return fut1 + fut2
Esempio n. 22
0
 def test_map_reduce_cos_key_one_reducer_per_object(self):
     print('Testing map_reduce() over COS keys with one reducer per object...')
     bucket_name = STORAGE_CONFIG['bucket']
     iterdata = [bucket_name + '/' + key for key in list_test_keys()]
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_obj, iterdata, my_reduce_function, reducer_one_per_object=True)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 23
0
def pywren_inside_pywren_map_function3(x):
    def _func(x):
        return x

    pw = pywren.ibm_cf_executor(config=CONFIG)
    fut1 = pw.map(_func, range(x))
    fut2 = pw.map(_func, range(x))
    return [pw.get_result(fut1), pw.get_result(fut2)]
 def test_map_reduce_cos_bucket_one_reducer_per_object(self):
     data_prefix = STORAGE_CONFIG['storage_bucket'] + '/' + PREFIX
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_bucket,
                   data_prefix,
                   my_reduce_function,
                   reducer_one_per_object=True)
     result = pw.get_result()
     self.checkResult(result)
 def test_chunks_bucket(self):
     data_prefix = STORAGE_CONFIG['storage_bucket'] + '/' + PREFIX
     pw = pywren.ibm_cf_executor(config=CONFIG)
     pw.map_reduce(my_map_function_bucket,
                   data_prefix,
                   my_reduce_function,
                   chunk_size=1 * 1024**2)
     result = pw.get_result()
     self.checkResult(result)
Esempio n. 26
0
def main():
    try:
        nmaps = int(sys.argv[1])
    except:
        print('Usage:\n\tpython3 main.py NUM_MAPS\n')
        exit(2)

    if nmaps < 1:
        print('ERROR: the number of maps must be higher than 1\n')
        exit(3)

    pw = pywren.ibm_cf_executor(rabbitmq_monitor=True)

    params = pika.URLParameters(pw.config['rabbitmq']['amqp_url'])
    connection = pika.BlockingConnection(params)
    channel = connection.channel()

    channel.exchange_declare(exchange='fan_logs', exchange_type='fanout')

    pw.call_async(master, nmaps)

    slave_list = []

    for i in range(nmaps):
        slave_list.append([nmaps, i])

    pw_s = pywren.ibm_cf_executor(rabbitmq_monitor=True)
    pw_s.map(slave, slave_list)

    results = pw_s.get_result()
    i = 0
    equal = len(set(map(tuple, results))) == 1

    for res_list in results:
        print(f'List {i}: {res_list}')
        i += 1

    if equal:
        print('\nIT WORKS!')
    else:
        print('\nThe lists don\'t match...')

    connection.close()
Esempio n. 27
0
    def post(self):

        data = api.payload
        iterdata = data['iterdata']

        pw = pywren.ibm_cf_executor(config=c_app.config.get('PYWREN_CONFIG'),
                                    runtime=c_app.config.get('PYWREN_RUNTIME'))
        pw.map_reduce(my_map_function, iterdata, my_reduce_function)
        result = pw.get_result()
        return jsonify({'result': result})
Esempio n. 28
0
def main():
    args = []

    if len(sys.argv) != 2:
        print("ERROR: Insuficientes agrumentos")
        exit(1)
    try:
        N = int(sys.argv[1])
    except:
        print("ERROR: El parámetro ha de ser un entero")
        exit(2)

    pw = pywren.ibm_cf_executor(rabbitmq_monitor=True)

    params = pika.URLParameters(pw.config['rabbitmq']['amqp_url'])
    connection = pika.BlockingConnection(params)
    channel = connection.channel()  # start a channel

    channel.exchange_declare(exchange='publish_subscribe',
                             exchange_type='fanout')

    pw.call_async(my_function_leader, N)

    pw = pywren.ibm_cf_executor(rabbitmq_monitor=True)

    d = {'N': N}
    for i in range(N):
        d['id'] = i
        args.append(d.copy())

    pw.map(my_function_slave, args)

    results = pw.get_result()

    print(results)

    channel.exchange_delete(exchange='publish_subscribe', if_unused=False)
    connection.close()

    exit(0)
Esempio n. 29
0
 def test_map_reduce_cos_key(self):
     cos = initCos()
     bucket_name = CONFIG['pywren']['storage_bucket']
     iterdata = [
         bucket_name + '/' + key
         for key in getFilenamesFromCOS(cos, bucket_name, PREFIX)
     ]
     chunk_size = 4 * 1024**2  # 4MB
     pw = pywren.ibm_cf_executor()
     pw.map_reduce(self.my_map_function_key, iterdata,
                   self.my_reduce_function, chunk_size)
     result = pw.get_result()
     self.checkResult(cos, result)
def calculate_centroids(config, input_db, polarity='+', isocalc_sigma=0.001238):
    bucket = config["storage"]["db_bucket"]
    formulas_chunks_prefix = input_db["formulas_chunks"]
    centroids_chunks_prefix = input_db["centroids_chunks"]
    clean_from_cos(config, bucket, centroids_chunks_prefix)

    def calculate_peaks_for_formula(formula_i, formula):
        mzs, ints = isocalc_wrapper.centroids(formula)
        if mzs is not None:
            return list(zip(repeat(formula_i), range(len(mzs)), mzs, ints))
        else:
            return []

    def calculate_peaks_chunk(obj, id, storage):
        print(f'Calculating peaks from formulas chunk {obj.key}')
        chunk_df = pd.read_msgpack(obj.data_stream._raw_stream)
        peaks = [peak for formula_i, formula in chunk_df.formula.items()
                 for peak in calculate_peaks_for_formula(formula_i, formula)]
        peaks_df = pd.DataFrame(peaks, columns=['formula_i', 'peak_i', 'mz', 'int'])
        peaks_df.set_index('formula_i', inplace=True)

        print(f'Storing centroids chunk {id}')
        centroids_chunk_key = f'{centroids_chunks_prefix}/{id}.msgpack'
        storage.put_object(Bucket=bucket, Key=centroids_chunk_key, Body=peaks_df.to_msgpack())

        return peaks_df.shape[0]

    from annotation_pipeline.isocalc_wrapper import IsocalcWrapper # Import lazily so that the rest of the pipeline still works if the dependency is missing
    isocalc_wrapper = IsocalcWrapper({
        # These instrument settings are usually customized on a per-dataset basis out of a set of
        # 18 possible combinations, but most of EMBL's datasets are compatible with the following settings:
        'charge': {
            'polarity': polarity,
            'n_charges': 1,
        },
        'isocalc_sigma': float(f"{isocalc_sigma:f}") # Rounding to match production implementation
    })

    pw = pywren.ibm_cf_executor(config=config)
    memory_capacity_mb = 2048
    futures = pw.map(calculate_peaks_chunk, f'cos://{bucket}/{formulas_chunks_prefix}/', runtime_memory=memory_capacity_mb)
    centroids_chunks_n = pw.get_result(futures)
    append_pywren_stats(futures, memory_mb=memory_capacity_mb, cloud_objects_n=len(futures))

    num_centroids = sum(centroids_chunks_n)
    n_centroids_chunks = len(centroids_chunks_n)
    logger.info(f'Calculated {num_centroids} centroids in {n_centroids_chunks} chunks')
    return num_centroids, n_centroids_chunks