Python bigprod Examples

Programming Language: Python

Namespace/Package Name: worms.util

Method/Function: bigprod

Examples at hotexamples.com: 3

Python bigprod - 3 examples found. These are the top rated real world Python examples of worms.util.bigprod extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: old_search.py Project: clrichar/worms

def _get_chunk_end_seg(sizes, max_workers, memsize):
    """TODO: Summary

    Args:
        sizes (TYPE): Description
        max_workers (TYPE): Description
        memsize (TYPE): Description

    Returns:
        TYPE: Description
    """
    end = len(sizes) - 1
    while end > 1 and (util.bigprod(sizes[end:]) < max_workers
                       or memsize <= 64 * util.bigprod(sizes[:end])):
        end -= 1
    return end

Example #2

Show file

File: old_search.py Project: willsheffler/worms

def _grow(segments, criteria, accumulator, **kw):
   """TODO: Summary

    Args:
        segments (TYPE): Description
        criteria (TYPE): Description
        accumulator (TYPE): Description
        kw: passthru args    """
   # terrible hack... xfering the poses too expensive
   tmp = {spl: (spl.body, spl.chains) for seg in segments for spl in seg.spliceables}
   for seg in segments:
      for spl in seg.spliceables:
         spl.body, spl.chains = None, None  # poses not pickleable...

   sizes = [len(s) for s in segments]
   ntot = util.bigprod(sizes)
   with kw["executor"](**kw["executor_args"]) as pool:
      context = (
         sizes[kw["end"]:],
         kw["njob"],
         segments,
         kw["end"],
         criteria,
         kw["thresh"],
         kw["matchlast"],
         kw["every_other"],
         kw["max_results"],
      )
      args = [range(kw["njob"])] + [it.repeat(context)]
      util.tqdm_parallel_map(
         pool=pool,
         function=_grow_chunks,
         accumulator=accumulator,
         map_func_args=args,
         batch_size=kw["nworker"] * 8,
         unit="K worms",
         ascii=0,
         desc="growing worms",
         unit_scale=int(ntot / kw["njob"] / 1000 / kw["every_other"]),
         disable=kw["verbosity"] < 0,
      )

   # put the poses back...
   for seg in segments:
      for spl in seg.spliceables:
         spl.body, spl.chains = tmp[spl]

Example #3

Show file

File: old_search.py Project: clrichar/worms

def grow(segments,
         criteria,
         *,
         thresh=2,
         expert=0,
         memsize=1e6,
         executor=None,
         executor_args=None,
         max_workers=None,
         verbosity=2,
         chunklim=None,
         max_samples=int(1e12),
         max_results=int(1e6),
         cart_resl=2.0,
         ori_resl=10.0,
         xindex_cache_file=None):
    """TODO: Summary

    Args:
        segments (TYPE): Description
        criteria (TYPE): Description
        thresh (int, optional): Description
        expert (int, optional): Description
        memsize (float, optional): Description
        executor (None, optional): Description
        executor_args (None, optional): Description
        max_workers (None, optional): Description
        verbosity (int, optional): Description
        chunklim (None, optional): Description
        max_samples (TYPE, optional): Description
        max_results (TYPE, optional): Description
        cart_resl (float, optional): Description
        ori_resl (float, optional): Description
        xindex_cache_file (None, optional): Description

    Returns:
        TYPE: Description

    Raises:
        ValueError: Description
    """
    if True:  # setup
        os.environ['OMP_NUM_THREADS'] = '1'
        os.environ['MKL_NUM_THREADS'] = '1'
        os.environ['NUMEXPR_NUM_THREADS'] = '1'
        if isinstance(segments, list):
            segments = Segments(segments)
        # if isinstance(executor, (ProcessPoolExecutor, ThreadPoolExecutor)):
        # raise ValueError('please use dask.distributed executor')
        if verbosity > 0:
            print('grow, from', criteria.from_seg, 'to', criteria.to_seg)
            for i, seg in enumerate(segments):
                print(' segment', i, 'enter:', seg.entrypol, 'exit:',
                      seg.exitpol)
                for sp in seg.spliceables:
                    print('   ', sp)
        elif verbosity == 0:
            print('grow, nseg:', len(segments))
        if verbosity > 2:
            global __print_best
            __print_best = True
        if not isinstance(criteria, CriteriaList):
            criteria = CriteriaList(criteria)
        if max_workers is not None and max_workers <= 0:
            max_workers = util.cpu_count()
        if executor_args is None and max_workers is None:
            executor_args = dict()
        elif executor_args is None:
            executor_args = dict(max_workers=max_workers)
        elif executor_args is not None and max_workers is not None:
            raise ValueError('executor_args incompatible with max_workers')

    max_results = int(max_results)

    if executor is None:
        executor = util.InProcessExecutor
        max_workers = 1
    if max_workers is None: max_workers = util.cpu_count()
    nworker = max_workers or util.cpu_count()

    if criteria.origin_seg is None:

        matchlast = _check_topology(segments, criteria, expert)
        sizes = [len(s) for s in segments]
        end = _get_chunk_end_seg(sizes, max_workers, memsize)
        ntot, chunksize, nchunks = (util.bigprod(x)
                                    for x in (sizes, sizes[:end], sizes[end:]))
        if max_samples is not None:
            max_samples = np.clip(chunksize * max_workers, max_samples, ntot)
        every_other = max(1, int(ntot / max_samples)) if max_samples else 1
        njob = int(np.sqrt(nchunks / every_other) / 128) * nworker
        njob = np.clip(nworker, njob, nchunks)

        actual_ntot = int(ntot / every_other)
        actual_nchunk = int(nchunks / every_other)
        actual_perjob = int(ntot / every_other / njob)
        actual_chunkperjob = int(nchunks / every_other / njob)
        if verbosity >= 0:
            print(
                'tot: {:,} chunksize: {:,} nchunks: {:,} nworker: {} njob: {}'.
                format(ntot, chunksize, nchunks, nworker, njob))
            print(
                'worm/job: {:,} chunk/job: {} sizes={} every_other={}'.format(
                    int(ntot / njob), int(nchunks / njob), sizes, every_other))
            print('max_samples: {:,} max_results: {:,}'.format(
                max_samples, max_results))
            print('actual tot:        {:,}'.format(int(actual_ntot)))
            print('actual nchunks:    {:,}'.format(int(actual_nchunk)))
            print('actual worms/job:  {:,}'.format(int(actual_perjob)))
            print('actual chunks/job: {:,}'.format(int(actual_chunkperjob)))
        _grow_args = dict(executor=executor,
                          executor_args=executor_args,
                          njob=njob,
                          end=end,
                          thresh=thresh,
                          matchlast=matchlast,
                          every_other=every_other,
                          max_results=max_results,
                          nworker=nworker,
                          verbosity=verbosity)
        if njob > 1e9 or nchunks >= 2**63 or every_other >= 2**63:
            print('too big?!?')
            print('    njob', njob)
            print('    nchunks', nchunks, nchunks / 2**63)
            print('    every_other', every_other, every_other / 2**63)
            raise ValueError('system too big')
        accum = SimpleAccumulator(max_results=max_results, max_tmp_size=1e5)
        _grow(segments, criteria, accum, **_grow_args)
        result = accum.final_result()
        if result is None: return None
        scores, lowidx, lowpos = result
        lowposlist = [lowpos[:, i] for i in range(len(segments))]
        score_check = criteria.score(segpos=lowposlist, verbosity=verbosity)
        assert np.allclose(score_check, scores)
        detail = dict(ntot=ntot,
                      chunksize=chunksize,
                      nchunks=nchunks,
                      nworker=nworker,
                      njob=njob,
                      sizes=sizes,
                      end=end)

    else:  # hash-based protocol...

        assert len(criteria) is 1
        _check_topology(segments, criteria, expert)

        splitpoint = criteria.from_seg
        tail, head = segments.split_at(splitpoint)

        print('HASH PROTOCOL splitting at segment', splitpoint)
        print('    full:', [len(s) for s in segments])

        headsizes = [len(s) for s in head]
        headend = _get_chunk_end_seg(headsizes, max_workers, memsize)
        ntot, chunksize, nchunks = (util.bigprod(x)
                                    for x in (headsizes, headsizes[:headend],
                                              headsizes[headend:]))
        if max_samples is not None:
            max_samples = np.clip(chunksize * max_workers, max_samples, ntot)
        every_other = max(1, int(ntot / max_samples)) if max_samples else 1
        njob = int(np.sqrt(nchunks / every_other) / 16 / nworker) * nworker
        njob = np.clip(nworker, njob, nchunks)
        _grow_args = dict(executor=executor,
                          executor_args=executor_args,
                          njob=njob,
                          end=headend,
                          thresh=thresh,
                          matchlast=0,
                          every_other=every_other,
                          max_results=1e9,
                          nworker=nworker,
                          verbosity=verbosity)
        t1 = 0
        if xindex_cache_file and os.path.exists(xindex_cache_file):
            print('!' * 100)
            print('reading xindex, xbinner from', xindex_cache_file)
            xindex, binner = pickle.load(open(xindex_cache_file, 'rb'))
        else:
            # if 1:
            accum1 = MakeXIndexAccumulator(headsizes,
                                           from_seg=0,
                                           to_seg=-1,
                                           cart_resl=cart_resl,
                                           ori_resl=ori_resl)
            headcriteria = Cyclic(criteria[0].nfold,
                                  from_seg=0,
                                  to_seg=-1,
                                  tol=criteria[0].tol * 2.0,
                                  lever=criteria[0].lever)
            print('STEP ONE: growing head into xindex')
            print('    ntot            {:,}'.format(ntot))
            print('    headsizes       {}'.format(headsizes))
            print('    headend         {:,}'.format(headend))
            print('    njob            {:,}'.format(njob))
            print('    nchunks         {:,}'.format(nchunks))
            print('    chunksize       {:,}'.format(chunksize))
            print('    thresh          {:,}'.format(thresh))
            print('    matchlast       {:,}'.format(0))
            print('    every_other     {:,}'.format(every_other))
            print('    max_results     {:,}'.format(max_results))
            print('    nworker         {:,}'.format(nworker))
            print('    act. ntot       {:,}'.format(int(ntot / every_other)))
            print('    act. nchunks    {:,}'.format(int(nchunks /
                                                        every_other)))
            print('    act. worms/job  {:,}'.format(
                int(ntot / every_other / njob)))
            print('    act. chunks/job {:,}'.format(
                int(nchunks / every_other / njob)))

            import time
            t1 = time.time()
            _grow(head, headcriteria, accum1, **_grow_args)
            xindex, binner = accum1.final_result()
            t1 = time.time() - t1
            print('!' * 100)
            print("TIME PHASE ONE", t1)
            print('!' * 100)

            if xindex_cache_file:
                print('!' * 100)
                print('dumping xindex to', xindex_cache_file)
                print('!' * 100)
                pickle.dump((xindex, binner), open(xindex_cache_file, 'wb'))

        ################### PHASE TWO ####################

        tailcriteria = XIndexedCriteria(xindex,
                                        binner,
                                        criteria[0].nfold,
                                        from_seg=-1)
        accum2 = XIndexedAccumulator(segments,
                                     tail,
                                     splitpoint,
                                     head,
                                     xindex,
                                     binner,
                                     criteria[0].nfold,
                                     from_seg=criteria.from_seg,
                                     to_seg=criteria.to_seg,
                                     max_results=max_results * 200)

        tailsizes = [len(s) for s in tail]
        tailend = _get_chunk_end_seg(tailsizes, max_workers, memsize)
        ntot, chunksize, nchunks = (util.bigprod(x)
                                    for x in (tailsizes, tailsizes[:tailend],
                                              tailsizes[tailend:]))
        if max_samples is not None:
            max_samples = np.clip(chunksize * max_workers, max_samples, ntot)
        every_other = max(1, int(ntot / max_samples *
                                 20)) if max_samples else 1
        njob = int(np.ceil(np.sqrt(nchunks / every_other) / 32 / nworker))
        njob = np.clip(nworker, njob * nworker, nchunks)

        _grow_args = dict(executor=executor,
                          executor_args=executor_args,
                          njob=njob,
                          end=tailend,
                          thresh=thresh,
                          matchlast=None,
                          every_other=every_other,
                          max_results=max_results,
                          nworker=nworker,
                          verbosity=verbosity)

        print('STEP TWO: using xindex, nbins: {:,} nentries: {:,}'.format(
            len(xindex), sum(len(x) for x in xindex.values())))
        print('    ntot            {:,}'.format(ntot))
        print('    tailsizes       {}'.format(tailsizes))
        print('    tailend         {:,}'.format(tailend))
        print('    njob            {:,}'.format(njob))
        print('    nchunks         {:,}'.format(nchunks))
        print('    chunksize       {:,}'.format(chunksize))
        print('    thresh          {:,}'.format(thresh))
        print('    matchlast       None')
        print('    every_other     {:,}'.format(every_other))
        print('    max_results     {:,}'.format(max_results))
        print('    nworker         {:,}'.format(nworker))
        print('    act. ntot       {:,}'.format(int(ntot / every_other)))
        print('    act. nchunks    {:,}'.format(int(nchunks / every_other)))
        print('    act. worms/job  {:,}'.format(int(ntot / every_other /
                                                    njob)))
        print('    act. chunks/job {:,}'.format(
            int(nchunks / every_other / njob)))
        print('    executor       ', type(executor()))

        import time
        t2 = time.time()

        _grow(tail, tailcriteria, accum2, **_grow_args)
        # import cProfile
        # cProfile.runctx('_grow(tail, tailcriteria, accum2, **_grow_args)',
        #                 locals(), globals(), 'grow2.stats')
        # import pstats
        # pst = pstats.Stats('grow2.stats')
        # pst.strip_dirs().sort_stats('time').print_stats(20)
        lowidx = accum2.final_result()
        t2 = time.time() - t2

        print('!' * 100)
        print("TIME PHASE ONE", t1)
        print("TIME PHASE TWO", t2)
        # print('   best 28 cores 1608.94K/s small 1min job 681k/.s')
        print('!' * 100)

        if lowidx is None:
            print('grow: no results')
            return

        print('refold segments')
        lowpos = _refold_segments(segments, lowidx)
        lowposlist = [lowpos[:, i] for i in range(len(segments))]
        print('score refolded segments')
        scores = criteria.score(segpos=lowposlist, verbosity=verbosity)
        print('organize results')
        nlow = sum(scores <= thresh)
        order = np.argsort(scores)[:nlow]
        scores = scores[order]
        lowpos = lowpos[order]
        lowidx = lowidx[order]
        detail = dict(ntot=ntot,
                      chunksize=chunksize,
                      nchunks=nchunks,
                      nworker=nworker,
                      njob=njob,
                      sizes=tailsizes,
                      end=tailend)

    return Worms(segments, scores, lowidx, lowpos, criteria, detail)