예제 #1
0
    def reduce(self,
               function,
               list_of_futures,
               extra_env=None,
               extra_meta=None):
        """
        Apply a function across all futures.

        # FIXME change to lazy iterator
        """
        #if self.invoker.TIME_LIMIT:
        wait(list_of_futures,
             return_when=ALL_COMPLETED)  # avoid race condition

        def reduce_func(fut_list):
            # FIXME speed this up for big reduce
            accum_list = []
            for f in fut_list:
                accum_list.append(f.result())
            return function(accum_list)

        return self.call_async(reduce_func,
                               list_of_futures,
                               extra_env=extra_env,
                               extra_meta=extra_meta)
예제 #2
0
파일: wren.py 프로젝트: zff/pywren
def get_all_results(fs):
    """
    Take in a list of futures and block until they are repeated,
    call result on each one individually, and return those
    results.

    Will throw an exception if any future threw an exception
    """
    wait(fs, return_when=ALL_COMPLETED)
    return [f.result() for f in fs]
def get_all_results(fs):
    """
    Take in a list of futures and block until they are repeated,
    call result on each one individually, and return those
    results.

    Will throw an exception if any future threw an exception
    """
    print("waiting", datetime.datetime.now())
    wait(fs, return_when=ALL_COMPLETED)
    print("iterate through results", datetime.datetime.now())
    return [f.result() for f in fs]
예제 #4
0
def get_all_results(fs):
    """
    Take in a list of futures and block until they are completed.
    call result on each one individually, and return those
    results.

    :param fs: a list of futures.
    :return: A list of the results of each futures
    :rtype: list

    Usage
      >>> pwex = pywren.default_executor()
      >>> futures = pwex.map(foo, data)
      >>> results = get_all_results(futures)
    """
    wait(fs, return_when=ALL_COMPLETED)
    return [f.result() for f in fs]
예제 #5
0
def featurize_get(infile, outfile):
    d = pickle.load(open(infile, 'rb'))
    futures = d['futures']
    data_infile = d['infile']
    data_df = pickle.load(open(data_infile, 'rb'))['df']
    data_weights_lut = dict(zip(data_df.molecule_id, data_df.p))

    del d['futures']

    process = psutil.Process(os.getpid())
    CHUNK_SIZE = 30
    out_filenames = []

    dir_name = outfile.replace(".done", ".dir")
    shutil.rmtree(
        dir_name,
        ignore_errors=True,
    )
    os.makedirs(dir_name)

    mol_filename_map = []

    for chunk_i in tqdm(range(int(np.ceil(len(futures) / CHUNK_SIZE))),
                        desc="chunks of futures"):

        to_get, later = futures[:CHUNK_SIZE], futures[CHUNK_SIZE:]
        fut_done, fut_notdone = wait(to_get, return_when=ALL_COMPLETED)
        print(len(fut_done), len(fut_notdone),
              print("{:3.1f}GB".format(process.memory_info().rss / 1e9)))
        futures = later
        del to_get
        gc.collect()

        for f in tqdm(fut_done, desc=f'futures chunk {chunk_i}'):

            for single_mol in f.result():
                conf_indices = single_mol['conf_indices']
                conf_n = len(conf_indices)
                mol_f = single_mol['mol_f']
                mol_id = single_mol['mol_id']

                p = data_weights_lut[mol_id]
                p = p / np.sum(p)
                mol_feat = np.average(mol_f, axis=0, weights=p)
                mol_filename = f"{dir_name}/{mol_id}.npy"
                np.save(mol_filename, mol_feat)

                mol_filename_map.append({
                    'molecule_id':
                    mol_id,
                    'filename':
                    os.path.relpath(mol_filename)
                })
                # mol_f.shape

                # # do the averaging

                # with db.write_batch() as wb:
                #     for i, conf_idx in enumerate(conf_indices):
                #         bytes_str = util.np_to_bytes(mol_f[i])
                #         id_str = "{:08d}.{:08d}".format(single_mol['mol_id'],
                #                                         conf_idx)
                #         wb.put(id_str.encode('ascii'), bytes_str)

        # chunk_filename = f"{outfile}.{i:08d}"
        # pickle.dump({'i' : i,
        #              'results' : [f.result() for f in fut_done]},
        #             open(chunk_filename, 'wb'))
        # out_filenames.append(chunk_filename)
    pickle.dump(
        {
            'infile': infile,
            'dir_name': dir_name,
            'mol_filename_df': pd.DataFrame(mol_filename_map),
        }, open(outfile, 'wb'))