def reduce(self, function, list_of_futures, extra_env=None, extra_meta=None): """ Apply a function across all futures. # FIXME change to lazy iterator """ #if self.invoker.TIME_LIMIT: wait(list_of_futures, return_when=ALL_COMPLETED) # avoid race condition def reduce_func(fut_list): # FIXME speed this up for big reduce accum_list = [] for f in fut_list: accum_list.append(f.result()) return function(accum_list) return self.call_async(reduce_func, list_of_futures, extra_env=extra_env, extra_meta=extra_meta)
def get_all_results(fs): """ Take in a list of futures and block until they are repeated, call result on each one individually, and return those results. Will throw an exception if any future threw an exception """ wait(fs, return_when=ALL_COMPLETED) return [f.result() for f in fs]
def get_all_results(fs): """ Take in a list of futures and block until they are repeated, call result on each one individually, and return those results. Will throw an exception if any future threw an exception """ print("waiting", datetime.datetime.now()) wait(fs, return_when=ALL_COMPLETED) print("iterate through results", datetime.datetime.now()) return [f.result() for f in fs]
def get_all_results(fs): """ Take in a list of futures and block until they are completed. call result on each one individually, and return those results. :param fs: a list of futures. :return: A list of the results of each futures :rtype: list Usage >>> pwex = pywren.default_executor() >>> futures = pwex.map(foo, data) >>> results = get_all_results(futures) """ wait(fs, return_when=ALL_COMPLETED) return [f.result() for f in fs]
def featurize_get(infile, outfile): d = pickle.load(open(infile, 'rb')) futures = d['futures'] data_infile = d['infile'] data_df = pickle.load(open(data_infile, 'rb'))['df'] data_weights_lut = dict(zip(data_df.molecule_id, data_df.p)) del d['futures'] process = psutil.Process(os.getpid()) CHUNK_SIZE = 30 out_filenames = [] dir_name = outfile.replace(".done", ".dir") shutil.rmtree( dir_name, ignore_errors=True, ) os.makedirs(dir_name) mol_filename_map = [] for chunk_i in tqdm(range(int(np.ceil(len(futures) / CHUNK_SIZE))), desc="chunks of futures"): to_get, later = futures[:CHUNK_SIZE], futures[CHUNK_SIZE:] fut_done, fut_notdone = wait(to_get, return_when=ALL_COMPLETED) print(len(fut_done), len(fut_notdone), print("{:3.1f}GB".format(process.memory_info().rss / 1e9))) futures = later del to_get gc.collect() for f in tqdm(fut_done, desc=f'futures chunk {chunk_i}'): for single_mol in f.result(): conf_indices = single_mol['conf_indices'] conf_n = len(conf_indices) mol_f = single_mol['mol_f'] mol_id = single_mol['mol_id'] p = data_weights_lut[mol_id] p = p / np.sum(p) mol_feat = np.average(mol_f, axis=0, weights=p) mol_filename = f"{dir_name}/{mol_id}.npy" np.save(mol_filename, mol_feat) mol_filename_map.append({ 'molecule_id': mol_id, 'filename': os.path.relpath(mol_filename) }) # mol_f.shape # # do the averaging # with db.write_batch() as wb: # for i, conf_idx in enumerate(conf_indices): # bytes_str = util.np_to_bytes(mol_f[i]) # id_str = "{:08d}.{:08d}".format(single_mol['mol_id'], # conf_idx) # wb.put(id_str.encode('ascii'), bytes_str) # chunk_filename = f"{outfile}.{i:08d}" # pickle.dump({'i' : i, # 'results' : [f.result() for f in fut_done]}, # open(chunk_filename, 'wb')) # out_filenames.append(chunk_filename) pickle.dump( { 'infile': infile, 'dir_name': dir_name, 'mol_filename_df': pd.DataFrame(mol_filename_map), }, open(outfile, 'wb'))