def _get_chunk_end_seg(sizes, max_workers, memsize): """TODO: Summary Args: sizes (TYPE): Description max_workers (TYPE): Description memsize (TYPE): Description Returns: TYPE: Description """ end = len(sizes) - 1 while end > 1 and (util.bigprod(sizes[end:]) < max_workers or memsize <= 64 * util.bigprod(sizes[:end])): end -= 1 return end
def _grow(segments, criteria, accumulator, **kw): """TODO: Summary Args: segments (TYPE): Description criteria (TYPE): Description accumulator (TYPE): Description kw: passthru args """ # terrible hack... xfering the poses too expensive tmp = {spl: (spl.body, spl.chains) for seg in segments for spl in seg.spliceables} for seg in segments: for spl in seg.spliceables: spl.body, spl.chains = None, None # poses not pickleable... sizes = [len(s) for s in segments] ntot = util.bigprod(sizes) with kw["executor"](**kw["executor_args"]) as pool: context = ( sizes[kw["end"]:], kw["njob"], segments, kw["end"], criteria, kw["thresh"], kw["matchlast"], kw["every_other"], kw["max_results"], ) args = [range(kw["njob"])] + [it.repeat(context)] util.tqdm_parallel_map( pool=pool, function=_grow_chunks, accumulator=accumulator, map_func_args=args, batch_size=kw["nworker"] * 8, unit="K worms", ascii=0, desc="growing worms", unit_scale=int(ntot / kw["njob"] / 1000 / kw["every_other"]), disable=kw["verbosity"] < 0, ) # put the poses back... for seg in segments: for spl in seg.spliceables: spl.body, spl.chains = tmp[spl]
def grow(segments, criteria, *, thresh=2, expert=0, memsize=1e6, executor=None, executor_args=None, max_workers=None, verbosity=2, chunklim=None, max_samples=int(1e12), max_results=int(1e6), cart_resl=2.0, ori_resl=10.0, xindex_cache_file=None): """TODO: Summary Args: segments (TYPE): Description criteria (TYPE): Description thresh (int, optional): Description expert (int, optional): Description memsize (float, optional): Description executor (None, optional): Description executor_args (None, optional): Description max_workers (None, optional): Description verbosity (int, optional): Description chunklim (None, optional): Description max_samples (TYPE, optional): Description max_results (TYPE, optional): Description cart_resl (float, optional): Description ori_resl (float, optional): Description xindex_cache_file (None, optional): Description Returns: TYPE: Description Raises: ValueError: Description """ if True: # setup os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' os.environ['NUMEXPR_NUM_THREADS'] = '1' if isinstance(segments, list): segments = Segments(segments) # if isinstance(executor, (ProcessPoolExecutor, ThreadPoolExecutor)): # raise ValueError('please use dask.distributed executor') if verbosity > 0: print('grow, from', criteria.from_seg, 'to', criteria.to_seg) for i, seg in enumerate(segments): print(' segment', i, 'enter:', seg.entrypol, 'exit:', seg.exitpol) for sp in seg.spliceables: print(' ', sp) elif verbosity == 0: print('grow, nseg:', len(segments)) if verbosity > 2: global __print_best __print_best = True if not isinstance(criteria, CriteriaList): criteria = CriteriaList(criteria) if max_workers is not None and max_workers <= 0: max_workers = util.cpu_count() if executor_args is None and max_workers is None: executor_args = dict() elif executor_args is None: executor_args = dict(max_workers=max_workers) elif executor_args is not None and max_workers is not None: raise ValueError('executor_args incompatible with max_workers') max_results = int(max_results) if executor is None: executor = util.InProcessExecutor max_workers = 1 if max_workers is None: max_workers = util.cpu_count() nworker = max_workers or util.cpu_count() if criteria.origin_seg is None: matchlast = _check_topology(segments, criteria, expert) sizes = [len(s) for s in segments] end = _get_chunk_end_seg(sizes, max_workers, memsize) ntot, chunksize, nchunks = (util.bigprod(x) for x in (sizes, sizes[:end], sizes[end:])) if max_samples is not None: max_samples = np.clip(chunksize * max_workers, max_samples, ntot) every_other = max(1, int(ntot / max_samples)) if max_samples else 1 njob = int(np.sqrt(nchunks / every_other) / 128) * nworker njob = np.clip(nworker, njob, nchunks) actual_ntot = int(ntot / every_other) actual_nchunk = int(nchunks / every_other) actual_perjob = int(ntot / every_other / njob) actual_chunkperjob = int(nchunks / every_other / njob) if verbosity >= 0: print( 'tot: {:,} chunksize: {:,} nchunks: {:,} nworker: {} njob: {}'. format(ntot, chunksize, nchunks, nworker, njob)) print( 'worm/job: {:,} chunk/job: {} sizes={} every_other={}'.format( int(ntot / njob), int(nchunks / njob), sizes, every_other)) print('max_samples: {:,} max_results: {:,}'.format( max_samples, max_results)) print('actual tot: {:,}'.format(int(actual_ntot))) print('actual nchunks: {:,}'.format(int(actual_nchunk))) print('actual worms/job: {:,}'.format(int(actual_perjob))) print('actual chunks/job: {:,}'.format(int(actual_chunkperjob))) _grow_args = dict(executor=executor, executor_args=executor_args, njob=njob, end=end, thresh=thresh, matchlast=matchlast, every_other=every_other, max_results=max_results, nworker=nworker, verbosity=verbosity) if njob > 1e9 or nchunks >= 2**63 or every_other >= 2**63: print('too big?!?') print(' njob', njob) print(' nchunks', nchunks, nchunks / 2**63) print(' every_other', every_other, every_other / 2**63) raise ValueError('system too big') accum = SimpleAccumulator(max_results=max_results, max_tmp_size=1e5) _grow(segments, criteria, accum, **_grow_args) result = accum.final_result() if result is None: return None scores, lowidx, lowpos = result lowposlist = [lowpos[:, i] for i in range(len(segments))] score_check = criteria.score(segpos=lowposlist, verbosity=verbosity) assert np.allclose(score_check, scores) detail = dict(ntot=ntot, chunksize=chunksize, nchunks=nchunks, nworker=nworker, njob=njob, sizes=sizes, end=end) else: # hash-based protocol... assert len(criteria) is 1 _check_topology(segments, criteria, expert) splitpoint = criteria.from_seg tail, head = segments.split_at(splitpoint) print('HASH PROTOCOL splitting at segment', splitpoint) print(' full:', [len(s) for s in segments]) headsizes = [len(s) for s in head] headend = _get_chunk_end_seg(headsizes, max_workers, memsize) ntot, chunksize, nchunks = (util.bigprod(x) for x in (headsizes, headsizes[:headend], headsizes[headend:])) if max_samples is not None: max_samples = np.clip(chunksize * max_workers, max_samples, ntot) every_other = max(1, int(ntot / max_samples)) if max_samples else 1 njob = int(np.sqrt(nchunks / every_other) / 16 / nworker) * nworker njob = np.clip(nworker, njob, nchunks) _grow_args = dict(executor=executor, executor_args=executor_args, njob=njob, end=headend, thresh=thresh, matchlast=0, every_other=every_other, max_results=1e9, nworker=nworker, verbosity=verbosity) t1 = 0 if xindex_cache_file and os.path.exists(xindex_cache_file): print('!' * 100) print('reading xindex, xbinner from', xindex_cache_file) xindex, binner = pickle.load(open(xindex_cache_file, 'rb')) else: # if 1: accum1 = MakeXIndexAccumulator(headsizes, from_seg=0, to_seg=-1, cart_resl=cart_resl, ori_resl=ori_resl) headcriteria = Cyclic(criteria[0].nfold, from_seg=0, to_seg=-1, tol=criteria[0].tol * 2.0, lever=criteria[0].lever) print('STEP ONE: growing head into xindex') print(' ntot {:,}'.format(ntot)) print(' headsizes {}'.format(headsizes)) print(' headend {:,}'.format(headend)) print(' njob {:,}'.format(njob)) print(' nchunks {:,}'.format(nchunks)) print(' chunksize {:,}'.format(chunksize)) print(' thresh {:,}'.format(thresh)) print(' matchlast {:,}'.format(0)) print(' every_other {:,}'.format(every_other)) print(' max_results {:,}'.format(max_results)) print(' nworker {:,}'.format(nworker)) print(' act. ntot {:,}'.format(int(ntot / every_other))) print(' act. nchunks {:,}'.format(int(nchunks / every_other))) print(' act. worms/job {:,}'.format( int(ntot / every_other / njob))) print(' act. chunks/job {:,}'.format( int(nchunks / every_other / njob))) import time t1 = time.time() _grow(head, headcriteria, accum1, **_grow_args) xindex, binner = accum1.final_result() t1 = time.time() - t1 print('!' * 100) print("TIME PHASE ONE", t1) print('!' * 100) if xindex_cache_file: print('!' * 100) print('dumping xindex to', xindex_cache_file) print('!' * 100) pickle.dump((xindex, binner), open(xindex_cache_file, 'wb')) ################### PHASE TWO #################### tailcriteria = XIndexedCriteria(xindex, binner, criteria[0].nfold, from_seg=-1) accum2 = XIndexedAccumulator(segments, tail, splitpoint, head, xindex, binner, criteria[0].nfold, from_seg=criteria.from_seg, to_seg=criteria.to_seg, max_results=max_results * 200) tailsizes = [len(s) for s in tail] tailend = _get_chunk_end_seg(tailsizes, max_workers, memsize) ntot, chunksize, nchunks = (util.bigprod(x) for x in (tailsizes, tailsizes[:tailend], tailsizes[tailend:])) if max_samples is not None: max_samples = np.clip(chunksize * max_workers, max_samples, ntot) every_other = max(1, int(ntot / max_samples * 20)) if max_samples else 1 njob = int(np.ceil(np.sqrt(nchunks / every_other) / 32 / nworker)) njob = np.clip(nworker, njob * nworker, nchunks) _grow_args = dict(executor=executor, executor_args=executor_args, njob=njob, end=tailend, thresh=thresh, matchlast=None, every_other=every_other, max_results=max_results, nworker=nworker, verbosity=verbosity) print('STEP TWO: using xindex, nbins: {:,} nentries: {:,}'.format( len(xindex), sum(len(x) for x in xindex.values()))) print(' ntot {:,}'.format(ntot)) print(' tailsizes {}'.format(tailsizes)) print(' tailend {:,}'.format(tailend)) print(' njob {:,}'.format(njob)) print(' nchunks {:,}'.format(nchunks)) print(' chunksize {:,}'.format(chunksize)) print(' thresh {:,}'.format(thresh)) print(' matchlast None') print(' every_other {:,}'.format(every_other)) print(' max_results {:,}'.format(max_results)) print(' nworker {:,}'.format(nworker)) print(' act. ntot {:,}'.format(int(ntot / every_other))) print(' act. nchunks {:,}'.format(int(nchunks / every_other))) print(' act. worms/job {:,}'.format(int(ntot / every_other / njob))) print(' act. chunks/job {:,}'.format( int(nchunks / every_other / njob))) print(' executor ', type(executor())) import time t2 = time.time() _grow(tail, tailcriteria, accum2, **_grow_args) # import cProfile # cProfile.runctx('_grow(tail, tailcriteria, accum2, **_grow_args)', # locals(), globals(), 'grow2.stats') # import pstats # pst = pstats.Stats('grow2.stats') # pst.strip_dirs().sort_stats('time').print_stats(20) lowidx = accum2.final_result() t2 = time.time() - t2 print('!' * 100) print("TIME PHASE ONE", t1) print("TIME PHASE TWO", t2) # print(' best 28 cores 1608.94K/s small 1min job 681k/.s') print('!' * 100) if lowidx is None: print('grow: no results') return print('refold segments') lowpos = _refold_segments(segments, lowidx) lowposlist = [lowpos[:, i] for i in range(len(segments))] print('score refolded segments') scores = criteria.score(segpos=lowposlist, verbosity=verbosity) print('organize results') nlow = sum(scores <= thresh) order = np.argsort(scores)[:nlow] scores = scores[order] lowpos = lowpos[order] lowidx = lowidx[order] detail = dict(ntot=ntot, chunksize=chunksize, nchunks=nchunks, nworker=nworker, njob=njob, sizes=tailsizes, end=tailend) return Worms(segments, scores, lowidx, lowpos, criteria, detail)