コード例 #1
0
ファイル: executor.py プロジェクト: ccdatatraits/pywren
    def reduce(self,
               function,
               list_of_futures,
               extra_env=None,
               extra_meta=None):
        """
        Apply a function across all futures.

        # FIXME change to lazy iterator
        """
        #if self.invoker.TIME_LIMIT:
        wait(list_of_futures,
             return_when=ALL_COMPLETED)  # avoid race condition

        def reduce_func(fut_list):
            # FIXME speed this up for big reduce
            accum_list = []
            for f in fut_list:
                accum_list.append(f.result())
            return function(accum_list)

        return self.call_async(reduce_func,
                               list_of_futures,
                               extra_env=extra_env,
                               extra_meta=extra_meta)
コード例 #2
0
ファイル: wren.py プロジェクト: zff/pywren
def get_all_results(fs):
    """
    Take in a list of futures and block until they are repeated,
    call result on each one individually, and return those
    results.

    Will throw an exception if any future threw an exception
    """
    wait(fs, return_when=ALL_COMPLETED)
    return [f.result() for f in fs]
コード例 #3
0
def get_all_results(fs):
    """
    Take in a list of futures and block until they are repeated,
    call result on each one individually, and return those
    results.

    Will throw an exception if any future threw an exception
    """
    print("waiting", datetime.datetime.now())
    wait(fs, return_when=ALL_COMPLETED)
    print("iterate through results", datetime.datetime.now())
    return [f.result() for f in fs]
コード例 #4
0
def get_all_results(fs):
    """
    Take in a list of futures and block until they are completed.
    call result on each one individually, and return those
    results.

    :param fs: a list of futures.
    :return: A list of the results of each futures
    :rtype: list

    Usage
      >>> pwex = pywren.default_executor()
      >>> futures = pwex.map(foo, data)
      >>> results = get_all_results(futures)
    """
    wait(fs, return_when=ALL_COMPLETED)
    return [f.result() for f in fs]
コード例 #5
0
def featurize_get(infile, outfile):
    d = pickle.load(open(infile, 'rb'))
    futures = d['futures']
    data_infile = d['infile']
    data_df = pickle.load(open(data_infile, 'rb'))['df']
    data_weights_lut = dict(zip(data_df.molecule_id, data_df.p))

    del d['futures']

    process = psutil.Process(os.getpid())
    CHUNK_SIZE = 30
    out_filenames = []

    dir_name = outfile.replace(".done", ".dir")
    shutil.rmtree(
        dir_name,
        ignore_errors=True,
    )
    os.makedirs(dir_name)

    mol_filename_map = []

    for chunk_i in tqdm(range(int(np.ceil(len(futures) / CHUNK_SIZE))),
                        desc="chunks of futures"):

        to_get, later = futures[:CHUNK_SIZE], futures[CHUNK_SIZE:]
        fut_done, fut_notdone = wait(to_get, return_when=ALL_COMPLETED)
        print(len(fut_done), len(fut_notdone),
              print("{:3.1f}GB".format(process.memory_info().rss / 1e9)))
        futures = later
        del to_get
        gc.collect()

        for f in tqdm(fut_done, desc=f'futures chunk {chunk_i}'):

            for single_mol in f.result():
                conf_indices = single_mol['conf_indices']
                conf_n = len(conf_indices)
                mol_f = single_mol['mol_f']
                mol_id = single_mol['mol_id']

                p = data_weights_lut[mol_id]
                p = p / np.sum(p)
                mol_feat = np.average(mol_f, axis=0, weights=p)
                mol_filename = f"{dir_name}/{mol_id}.npy"
                np.save(mol_filename, mol_feat)

                mol_filename_map.append({
                    'molecule_id':
                    mol_id,
                    'filename':
                    os.path.relpath(mol_filename)
                })
                # mol_f.shape

                # # do the averaging

                # with db.write_batch() as wb:
                #     for i, conf_idx in enumerate(conf_indices):
                #         bytes_str = util.np_to_bytes(mol_f[i])
                #         id_str = "{:08d}.{:08d}".format(single_mol['mol_id'],
                #                                         conf_idx)
                #         wb.put(id_str.encode('ascii'), bytes_str)

        # chunk_filename = f"{outfile}.{i:08d}"
        # pickle.dump({'i' : i,
        #              'results' : [f.result() for f in fut_done]},
        #             open(chunk_filename, 'wb'))
        # out_filenames.append(chunk_filename)
    pickle.dump(
        {
            'infile': infile,
            'dir_name': dir_name,
            'mol_filename_df': pd.DataFrame(mol_filename_map),
        }, open(outfile, 'wb'))