async def test_flat_map_square_workers_async_5(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 async def _generator_async(x): yield x yield x + 1 await asyncio.sleep(0.01) yield x + 2 async def nums_generator(): for x in nums: yield x nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.task.map(lambda x: x**2, nums_generator()) nums_pl = pl.task.flat_map(_generator_async, nums_pl, workers=3, timeout=0.1, maxsize=0) nums_pl = await nums_pl assert nums_py == [] or sorted(nums_pl) == sorted(nums_py)
def run(args): # Set logging level. logging_debug_opt = False LOGGER.addHandler(create_logging_handler(logging_debug_opt)) LOGGER.setLevel(logging.DEBUG) LOGGER.info("Using configuration {}.".format(args.config_filename)) cfg = ConfigParser() cfg.read(args.config_filename) in_fname = cfg['data']['modules'] if not args.input else args.input LOGGER.info("Loading modules from {}.".format(in_fname)) # Loading from YAML is extremely slow. Therefore this is a potential performance improvement. # Potential improvements are switching to JSON or to use a CLoader: # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml if in_fname.endswith('.yaml'): modules = load_from_yaml(in_fname) else: with open(in_fname, 'rb') as f: modules = pickle.load(f) # Filter out modules with to few genes. min_genes = int(cfg['parameters']['min_genes']) modules = list(filter(lambda m: len(m) >= min_genes, modules)) LOGGER.info("Loading databases.") def name(fname): return os.path.splitext(os.path.basename(fname))[0] db_fnames = list(mapcat(glob.glob, cfg['data']['databases'].split(";"))) dbs = [ RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames ] LOGGER.info("Calculating regulons.") motif_annotations_fname = cfg['data']['motif_annotations'] mode = cfg['parameters']['mode'] with ProgressBar() if mode == "dask_multiprocessing" else NoProgressBar(): df = prune2df(dbs, modules, motif_annotations_fname, rank_threshold=int(cfg['parameters']['rank_threshold']), auc_threshold=float(cfg['parameters']['auc_threshold']), nes_threshold=float(cfg['parameters']['nes_threshold']), client_or_address=mode, module_chunksize=cfg['parameters']['chunk_size'], num_workers=args.num_workers) LOGGER.info("Writing results to file.") df.to_csv(cfg['parameters']['output'] if not args.output else args.output)
def test_flat_map_square_workers(nums): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.sync.map(lambda x: x**2, nums) nums_pl = pl.sync.flat_map(_generator, nums_pl, workers=3) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square(nums): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.sync.map(lambda x: x**2, nums) nums_pl = pl.sync.flat_map(_generator, nums_pl) nums_pl = list(nums_pl) assert nums_pl == nums_py
def test_flat_map_square_workers(nums: tp.List[int]): def generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(generator, nums_py) nums_py = list(nums_py) nums_pl = pl.thread.map(lambda x: x**2, nums) nums_pl = pl.thread.flat_map(generator, nums_pl, workers=3) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square(nums: tp.List[int]): def generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(generator, nums_py) nums_py = list(nums_py) nums_pl = pl.thread.map(lambda x: x**2, nums) nums_pl = pl.thread.flat_map(generator, nums_pl) nums_pl = list(nums_pl) assert nums_pl == nums_py
def test_flat_map_square_filter_workers(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x ** 2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = pl.task.map(lambda x: x ** 2, nums) nums_pl = pl.task.flat_map(_generator, nums_pl, workers=2) nums_pl = pl.task.filter(lambda x: x > 1, nums_pl) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square_filter_workers_pipe(nums): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = (nums | pl.sync.map(lambda x: x**2) | pl.sync.flat_map(_generator, workers=3) | pl.sync.filter(lambda x: x > 1) | list) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square_filter_workers_pipe(nums: tp.List[int]): def generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = (nums | pl.thread.map(lambda x: x**2) | pl.thread.flat_map(generator, workers=3) | pl.thread.filter(lambda x: x > 1) | list) assert sorted(nums_pl) == sorted(nums_py)
async def test_flat_map_square_async_2(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 async def _generator_async(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.task.map(lambda x: x**2, nums) nums_pl = pl.task.flat_map(_generator_async, nums_pl) nums_pl = await nums_pl assert nums_pl == nums_py
def test_flat_map_square_workers_async_1(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 async def _generator_async(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.task.map(lambda x: x**2, nums) nums_pl = pl.task.flat_map(_generator_async, nums_pl, workers=3) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
async def test_flat_map_square_filter_workers_pipe_3(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x ** 2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) async def gt1(x): return x > 1 nums_pl = await ( nums | pl.task.map(lambda x: x ** 2) | pl.task.flat_map(_generator, workers=3) | pl.task.filter(gt1) ) assert sorted(nums_pl) == sorted(nums_py)
async def test_flat_map_square_async_4(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 async def _generator_async(x): return [x, x + 1, x + 2] async def nums_generator(): for x in nums: yield x nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = list(nums_py) nums_pl = pl.task.map(lambda x: x**2, nums_generator()) nums_pl = pl.task.flat_map(_generator_async, nums_pl) nums_pl = await nums_pl assert nums_pl == nums_py
def _wrapped(total): batches = partition_all(num, total) combined = mapcat(func, batches) return list(combined)
def systems_get(systems): """Get data for systems.""" batches = partition_all(100, systems) combined = mapcat(_systems_get, batches) return list(combined)