Ejemplo n.º 1
0
    def _cm_breaking(infr, cm_list=None, review_cfg={}):
        """
            >>> review_cfg = {}
        """
        if cm_list is None:
            cm_list = infr.cm_list
        ranks_top = review_cfg.get('ranks_top', None)
        ranks_bot = review_cfg.get('ranks_bot', None)

        # Construct K-broken graph
        edges = []

        if ranks_bot is None:
            ranks_bot = 0

        for count, cm in enumerate(cm_list):
            score_list = cm.annot_score_list
            rank_list = ub.argsort(score_list)[::-1]
            sortx = ub.argsort(rank_list)

            top_sortx = sortx[:ranks_top]
            bot_sortx = sortx[len(sortx) - ranks_bot:]
            short_sortx = list(ub.unique(top_sortx + bot_sortx))

            daid_list = list(ub.take(cm.daid_list, short_sortx))
            for daid in daid_list:
                u, v = (cm.qaid, daid)
                if v < u:
                    u, v = v, u
                edges.append((u, v))
        return edges
Ejemplo n.º 2
0
def suggest_spelling_correction(name, all_names, top=10):
    import xdev
    distances = xdev.edit_distance(name, all_names)
    idxs = ub.argsort(distances)[0:top]
    candidates = list(ub.take(all_names, idxs))
    print('did you mean on of: {}?'.format(ub.repr2(candidates, nl=1)))
    return candidates
Ejemplo n.º 3
0
    def _build_index(self):
        """ construct lookup tables """
        # Most of the categories should have been given integer ids
        max_id = max(
            it.chain([0],
                     nx.get_node_attributes(self.graph, 'id').values()))
        # Fill in id-values for any node that doesn't have one
        node_to_id = {}
        for node, attrs in sorted(self.graph.nodes.items()):
            node_to_id[node] = attrs.get('id', max_id + 1)
            max_id = max(max_id, node_to_id[node])
        id_to_node = ub.invert_dict(node_to_id)

        # Compress ids into a flat index space (sorted by node ids)
        idx_to_node = ub.argsort(node_to_id)
        node_to_idx = {node: idx for idx, node in enumerate(idx_to_node)}

        # Find the sets of nodes that need to be softmax-ed together
        node_groups = list(traverse_siblings(self.graph))
        idx_groups = [
            sorted([node_to_idx[n] for n in group]) for group in node_groups
        ]

        # Set instance attributes
        self.id_to_node = id_to_node
        self.node_to_id = node_to_id
        self.idx_to_node = idx_to_node
        self.node_to_idx = node_to_idx
        self.idx_groups = idx_groups
Ejemplo n.º 4
0
    def from_data(xpu, item, **kwargs):
        """
        Creates an XPU to represent the processing device a Tensor or Variable
        is on

        Example:
            >>> xpu = XPU.from_data(torch.randn(3))
            >>> assert not xpu.is_gpu()
            >>> if torch.cuda.is_available():
            >>>     xpu = XPU.from_data(torch.randn(3).cuda())
            >>>     assert xpu.is_gpu()
            >>>     for i in range(torch.cuda.device_count()):
            >>>         xpu = XPU.from_data(torch.randn(3).cuda(i))
            >>>         assert xpu.is_gpu()
            >>>         assert xpu.main_device == i
        """
        if hasattr(item, 'is_cuda'):
            if item.is_cuda:
                return XPU(item.get_device())
            else:
                return XPU(None)
        elif hasattr(item, 'state_dict'):
            state_dict = item.state_dict()
            hist = ub.dict_hist(v.get_device() if v.is_cuda else None
                                for v in state_dict.values())
            device_num = ub.argsort(hist)[-1]
            return XPU(device_num)
        else:
            raise TypeError(type(item))
Ejemplo n.º 5
0
    def get_summary(self, profile_block_list, maxlines=20):
        """
        Args:
            profile_block_list (List[str]):
            maxlines (int):

        Returns:
            str:

        References:
            https://github.com/rkern/line_profiler
        """
        import ubelt as ub
        time_list = [self.get_block_totaltime(block) for block in profile_block_list]
        time_list = [time if time is not None else -1 for time in time_list]

        @ub.memoize
        def readlines(fpath):
            return open(fpath, 'r').readlines()

        blockid_list = [self.get_block_id(block, readlines=readlines)
                        for block in profile_block_list]
        sortx = ub.argsort(time_list)
        sorted_time_list = list(ub.take(time_list, sortx))
        sorted_blockid_list = list(ub.take(blockid_list, sortx))

        aligned_blockid_list = _align_lines(sorted_blockid_list, ':')
        summary_lines = [('%6.2f seconds - ' % time) + line
                         for time, line in
                         zip(sorted_time_list, aligned_blockid_list)]

        summary_text = '\n'.join(summary_lines[-maxlines:])
        return summary_text
Ejemplo n.º 6
0
    def _BROKEN_rank_epochs(monitor):
        """
        FIXME:
            broken - implement better rank aggregation with custom weights

        Example:
            >>> monitor = demodata_monitor()
            >>> monitor._BROKEN_rank_epochs()
        """
        rankings = {}
        for key, value in monitor.best_epochs(smooth=False).items():
            rankings[key + '_raw'] = value

        for key, value in monitor.best_epochs(smooth=True).items():
            rankings[key + '_smooth'] = value

        # borda-like weighted rank aggregation.
        # probably could do something better.
        epoch_to_weight = ub.ddict(lambda: 0)
        for key, ranking in rankings.items():
            # weights = np.linspace(0, 1, num=len(ranking))[::-1]
            weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100
            for epoch, w in zip(ranking, weights):
                epoch_to_weight[epoch] += w

        agg_ranking = ub.argsort(epoch_to_weight)[::-1]
        return agg_ranking
Ejemplo n.º 7
0
def find_unused_gpu(min_memory=0):
    """
    Finds GPU with the lowest memory usage by parsing output of nvidia-smi

    Args:
        min_memory (int): disregards GPUs with fewer than `min_memory` free MB

    Returns:
        int or None: gpu num if a match is found otherwise None

    CommandLine:
        python -c "from netharn import device; print(device.find_unused_gpu(300))"

    Example:
        >>> if torch.cuda.is_available():
        >>>     item = find_unused_gpu()
        >>>     assert item is None or isinstance(item, int)
    """
    gpus = gpu_info()
    if not gpus:
        return None
    gpu_avail_mem = {n: gpu['mem_avail'] for n, gpu in gpus.items()}
    usage_order = ub.argsort(gpu_avail_mem)
    gpu_num = usage_order[-1]
    if gpu_avail_mem[gpu_num] < min_memory:
        return None
    else:
        return gpu_num
Ejemplo n.º 8
0
    def get_summary(self, profile_block_list, maxlines=20):
        """
        References:
            https://github.com/rkern/line_profiler
        """
        time_list = [self.get_block_totaltime(block) for block in profile_block_list]
        time_list = [time if time is not None else -1 for time in time_list]
        blockid_list = [self.get_block_id(block) for block in profile_block_list]
        sortx = ub.argsort(time_list)
        sorted_time_list = list(ub.take(time_list, sortx))
        sorted_blockid_list = list(ub.take(blockid_list, sortx))

        import utool as ut
        aligned_blockid_list = ut.util_str.align_lines(sorted_blockid_list, ':')
        summary_lines = [('%6.2f seconds - ' % time) + line
                         for time, line in
                         zip(sorted_time_list, aligned_blockid_list)]
        #summary_header = ut.codeblock(
        #    '''
        #    CLEANED PROFILE OUPUT

        #    The Pystone timings are not from kernprof, so they may include kernprof
        #    overhead, whereas kernprof timings do not (unless the line being
        #    profiled is also decorated with kernrof)

        #    The kernprof times are reported in Timer Units

        #    ''')
        # summary_lines_ = ut.listclip(summary_lines, maxlines, fromback=True)
        summary_text = '\n'.join(summary_lines[-maxlines:])
        return summary_text
Ejemplo n.º 9
0
    def fix_conference_places(bibman):

        pubman = constants_tex_fixes.PubManager()

        needed = set()

        for entry in bibman.cleaned.values():
            if entry['pub_type'] == 'conference':
                accro, year = (entry['pub_accro'], entry['year'])
                pub = pubman.find(accro)
                if pub.places is None or int(year) not in pub.places:
                    needed.add((accro, year))
                else:
                    place = pub.places[int(year)]
                    print('place = {!r}'.format(place))
                    entry['address'] = place

        if needed:
            needed = list(needed)
            used_years = ub.group_items(needed, ut.take_column(needed, 0))
            for k, v in list(used_years.items()):
                used_years[k] = sorted(v)

            sortby = ub.map_vals(lambda vs: (len(vs), max(e[1] for e in vs)),
                                 used_years)
            used_years = ut.order_dict_by(used_years, ub.argsort(sortby))
            print('NEED CONFERENCE LOCATIONS')
            print(ub.repr2(used_years, nl=2))
Ejemplo n.º 10
0
def _sort_itemstrs(items, itemstrs):
    """
    Equivalent to `sorted(items)` except if `items` are unorderable, then
    string values are used to define an ordering.
    """
    # First try to sort items by their normal values
    # If that doesnt work, then sort by their string values
    import ubelt as ub
    try:
        # Set ordering is not unique. Sort by strings values instead.
        if _peek_isinstance(items, (set, frozenset)):
            raise TypeError
        sortx = ub.argsort(items)
    except TypeError:
        sortx = ub.argsort(itemstrs)
    itemstrs = [itemstrs[x] for x in sortx]
    return itemstrs
Ejemplo n.º 11
0
 def _convert_dict(data):
     try:
         ordered_ = sorted(data.items())
         # what raises a TypeError differs between Python 2 and 3
     except TypeError:
         import ubelt as ub
         sortx = ub.argsort(data, key=str)
         ordered_ = [(k, data[k]) for k in sortx]
     hashable = b''.join(_hashable_sequence(ordered_, extensions=self))
     prefix = b'DICT'
     return prefix, hashable
Ejemplo n.º 12
0
def bench_closures():
    """
    Is it faster to use a closure or pass in the variables explicitly?
    """
    import ubelt as ub
    import timerit
    import numpy as np

    # Test a nested func with vs without a closure
    def rand_complex(*shape):
        real = np.random.rand(*shape).astype(np.complex)
        imag = np.random.rand(*shape).astype(np.complex) * 1j
        mat = real + imag
        return mat

    s = int(ub.argval('--s', default='1'))
    mat1 = rand_complex(s, s)
    mat2 = rand_complex(s, s)
    N = 1000
    offset = 100

    def nested_closure():
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    def nested_explicit(mat1, mat2, N, offset):
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    ti = timerit.Timerit(int(2**11),
                         bestof=int(2**8),
                         verbose=int(ub.argval('--verbose', default='1')))

    for timer in ti.reset('nested_explicit'):
        with timer:
            nested_explicit(mat1, mat2, N, offset)

    for timer in ti.reset('nested_closure'):
        with timer:
            nested_closure()

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9,
                                             nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
Ejemplo n.º 13
0
 def _convert_set(data):
     try:
         # what raises a TypeError differs between Python 2 and 3
         ordered_ = sorted(data)
     except TypeError:
         import ubelt as ub
         data_ = list(data)
         sortx = ub.argsort(data_, key=str)
         ordered_ = [data_[k] for k in sortx]
     hashable = b''.join(_hashable_sequence(ordered_, extensions=self))
     prefix = b'SET'
     return prefix, hashable
Ejemplo n.º 14
0
def find_unused_gpu(min_memory=0):
    """
    Finds GPU with the lowest memory usage by parsing output of nvidia-smi

    Args:
        min_memory (int): disregards GPUs with fewer than `min_memory` free MB

    Returns:
        int or None: gpu num if a match is found otherwise None

    CommandLine:
        python -c "from netharn import device; print(device.find_unused_gpu(300))"

        CUDA_VISIBLE_DEVICES=1; python -c "from netharn import device; print(device.find_unused_gpu(300))"

    Example:
        >>> if torch.cuda.is_available():
        >>>     item = find_unused_gpu()
        >>>     assert item is None or isinstance(item, int)
    """

    # Notes on slurm:
    # If we are running in slurm, then we should be able to see these
    # environment vars
    # SLURM_STEP_GPUS
    # GPU_DEVICE_ORDINAL
    # Also respect CUDA_VISIBLE_DEVICES
    try:
        gpus = gpu_info()
    except NvidiaSMIError:
        gpus = None

    if not gpus:
        return None

    # Order GPUs by most available memory
    # gpu_avail_mem = {n: -gpu['mem_avail'] for n, gpu in gpus.items()}

    # Order GPUs by fewest compute processes, and then by available memory
    gpu_avail_mem = {
        n: (gpu['num_compute_procs'], -gpu['mem_avail'])
        for n, gpu in gpus.items()
    }
    ranked_order = ub.argsort(gpu_avail_mem)

    for gpu_num in ranked_order:
        gpu = gpus[gpu_num]
        if gpu['mem_avail'] >= min_memory:
            return gpu_num
    return None
Ejemplo n.º 15
0
    def images_with_keypoints():
        keypoint_gids = set()
        for aid, ann in merged.anns.items():
            if ann['roi_shape'] == 'keypoints':
                keypoint_gids.add(ann['image_id'])

        relevant = ub.dict_subset(merged.gid_to_aids, keypoint_gids)
        relevant = {
            gid:
            [a for a in aids if merged.anns[a]['roi_shape'] == 'keypoints']
            for gid, aids in relevant.items()
        }

        gid_list = ub.argsort(ub.map_vals(len, relevant))[::-1]
        return gid_list
Ejemplo n.º 16
0
 def _convert_dict(data):
     try:
         ordered_ = sorted(data.items())
         # what raises a TypeError differs between Python 2 and 3
     except TypeError:
         import ubelt as ub
         sortx = ub.argsort(data, key=str)
         ordered_ = [(k, data[k]) for k in sortx]
     # See: [util_hash.Note.1]
     hashable = b''.join(
         _hashable_sequence(
             ordered_,
             extensions=self,
             types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT))
     prefix = b'DICT'
     return prefix, hashable
Ejemplo n.º 17
0
def find_unused_gpu(min_memory=0):
    """
    Finds GPU with the lowest memory usage by parsing output of nvidia-smi

    python -c "from pysseg.util import gpu_util; print(gpu_util.find_unused_gpu())"
    """
    gpus = gpu_info()
    if gpus is None:
        return None
    gpu_avail_mem = {n: gpu['mem_avail'] for n, gpu in gpus.items()}
    usage_order = ub.argsort(gpu_avail_mem)
    gpu_num = usage_order[-1]
    if gpu_avail_mem[gpu_num] < min_memory:
        return None
    else:
        return gpu_num
Ejemplo n.º 18
0
 def _convert_set(data):
     try:
         # what raises a TypeError differs between Python 2 and 3
         ordered_ = sorted(data)
     except TypeError:
         import ubelt as ub
         data_ = list(data)
         sortx = ub.argsort(data_, key=str)
         ordered_ = [data_[k] for k in sortx]
     # See: [util_hash.Note.1]
     hashable = b''.join(
         _hashable_sequence(
             ordered_,
             extensions=self,
             types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT))
     prefix = b'SET'
     return prefix, hashable
Ejemplo n.º 19
0
def read_tensorboard_scalars(train_dpath, verbose=1, cache=1):
    """
    Reads all tensorboard scalar events in a directory.
    Caches them becuase reading events of interest from protobuf can be slow.
    """
    import glob
    from os.path import join
    try:
        from tensorboard.backend.event_processing import event_accumulator
    except ImportError:
        raise ImportError('tensorboard is not installed')
    event_paths = sorted(glob.glob(join(train_dpath, 'events.out.tfevents*')))
    # make a hash so we will re-read of we need to
    cfgstr = ub.hash_data(list(map(ub.hash_file, event_paths)))
    # cfgstr = ub.hash_data(list(map(basename, event_paths)))
    cacher = ub.Cacher('tb_scalars',
                       enabled=cache,
                       dpath=ub.ensuredir((train_dpath, '_cache')),
                       cfgstr=cfgstr)
    datas = cacher.tryload()
    if datas is None:
        datas = {}
        for p in ub.ProgIter(list(reversed(event_paths)),
                             desc='read tensorboard',
                             enabled=verbose):
            ea = event_accumulator.EventAccumulator(p)
            ea.Reload()
            for key in ea.scalars.Keys():
                if key not in datas:
                    datas[key] = {'xdata': [], 'ydata': [], 'wall': []}
                subdatas = datas[key]
                events = ea.scalars.Items(key)
                for e in events:
                    subdatas['xdata'].append(int(e.step))
                    subdatas['ydata'].append(float(e.value))
                    subdatas['wall'].append(float(e.wall_time))

        # Order all information by its wall time
        for key, subdatas in datas.items():
            sortx = ub.argsort(subdatas['wall'])
            for d, vals in subdatas.items():
                subdatas[d] = list(ub.take(vals, sortx))
        cacher.save(datas)
    return datas
Ejemplo n.º 20
0
def main():
    import ubelt as ub
    from ubelt import util_list
    from ubelt.util_list import take
    import random
    from math import e

    # # Data
    N = 100
    array = [random.random() for _ in range(N)]
    indices = [random.randint(0, N - 1) for _ in range(int(N // e))]

    ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1)

    for timer in ti.reset('take'):
        with timer:
            list(take(array, indices))

    for timer in ti.reset('util_list.take'):
        with timer:
            list(util_list.take(array, indices))

    for timer in ti.reset('ub.take'):
        with timer:
            list(ub.take(array, indices))

    print('---')

    # import pandas as pd
    # df = pd.DataFrame(rankings)
    # print('df =\n{}'.format(df))

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
Ejemplo n.º 21
0
    def best_epochs(monitor):
        rankings = {}

        def _rank(key, metrics, type='min'):
            values = [m[key] for m in metrics]
            sortx = np.argsort(values)
            if type == 'max':
                sortx = np.argsort(values)[::-1]
            elif type == 'min':
                sortx = np.argsort(values)
            else:
                raise KeyError(type)
            ranked_epochs = np.array(monitor.epochs)[sortx]
            return ranked_epochs

        for key in monitor.min_keys:
            rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'min')
            rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'min')

        for key in monitor.max_keys:
            rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'max')
            rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'max')

        for key in monitor.max_keys:
            values = [m[key] for m in monitor.raw_metrics]
            sortx = np.argsort(values)[::-1]
            ranked_epochs = np.array(monitor.epochs)[sortx]
            rankings[key] = ranked_epochs

        # borda-like weighted rank aggregation.
        # probably could do something better.
        epoch_to_weight = ub.ddict(lambda: 0)
        for key, ranking in rankings.items():
            # weights = np.linspace(0, 1, num=len(ranking))[::-1]
            weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100
            for epoch, w in zip(ranking, weights):
                epoch_to_weight[epoch] += w

        agg_ranking = ub.argsort(epoch_to_weight)[::-1]
        return agg_ranking
Ejemplo n.º 22
0
    def sort_entries(bibman):
        def freq_group(items, groupids):
            groups = ub.group_items(items, groupids)
            hist = ub.map_vals(len, groups)
            for k in ub.argsort(hist):
                yield groups[k]

        high_level_alias = {
            'incollection': 'book',
            'conference': 'confjourn',
            'journal': 'confjourn',
            'online-journal': 'confjourn',
        }
        sorted_entries = []
        entries = list(bibman.cleaned.values())
        groups = [
            high_level_alias.get(entry['pub_type'], entry['pub_type'])
            for entry in entries
        ]
        entry_groups = freq_group(entries, groups)
        for group in entry_groups:
            subids = [entry['ENTRYTYPE'] for entry in group]
            for subgroup in freq_group(group, subids):
                subsubids = [entry['pub_full'] for entry in subgroup]
                # Group publications, and then sort conferences by max date
                pub_groups = []
                pub_maxdates = []
                for ssg in freq_group(subgroup, subsubids):
                    sssid = [(entry['date']) for entry in ssg]
                    ssg2 = list(ub.take(ssg, ub.argsort(sssid)))
                    pub_groups.append(ssg2)
                    pub_maxdates.append(ssg2[-1]['date'])
                subgroup2 = list(
                    ub.flatten(ut.sortedby2(pub_groups, pub_maxdates)))
                sorted_entries.extend(subgroup2)
        new_entries = ub.odict([(e['ID'], e) for e in sorted_entries])
        [e['pub_type'] for e in sorted_entries]
        bibman.cleaned = new_entries
Ejemplo n.º 23
0
def benchmark_hash_file():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --show
        python ~/code/ubelt/dev/bench_hash.py --show
    """
    import ubelt as ub
    import random

    # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp'))
    dpath = ub.ensuredir(ub.expandpath('$HOME/tmp'))

    rng = random.Random(0)
    # Create a pool of random chunks of data
    chunksize = int(2 ** 20)
    pool_size = 8
    part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)]

    #ITEM = 'JUST A STRING' * 100
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']

    scales = list(range(5, 10))
    import os

    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2 ** s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        # Write a big file
        size_pool = [N]
        fpath = _write_random_file(dpath, part_pool, size_pool, rng)

        megabytes = os.stat(fpath).st_size / (2 ** 20)
        print('megabytes = {!r}'.format(megabytes))

        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_file(fpath, hasher=hasher)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds')
        kwplot.show_if_requested()
Ejemplo n.º 24
0
def group_indices(idx_to_groupid, assume_sorted=False):
    """
    Find unique items and the indices at which they appear in an array.

    A common use case of this function is when you have a list of objects
    (often numeric but sometimes not) and an array of "group-ids" corresponding
    to that list of objects.

    Using this function will return a list of indices that can be used in
    conjunction with :func:`apply_grouping` to group the elements.  This is
    most useful when you have many lists (think column-major data)
    corresponding to the group-ids.

    In cases where there is only one list of objects or knowing the indices
    doesn't matter, then consider using func:`group_items` instead.

    Args:
        idx_to_groupid (ndarray):
            The input array, where each item is interpreted as a group id.
            For the fastest runtime, the input array must be numeric (ideally
            with integer types).  If the type is non-numeric then the less
            efficient :func:`ubelt.group_items` is used.

        assume_sorted (bool, default=False):
            If the input array is sorted, then setting this to True will avoid
            an unnecessary sorting operation and improve efficiency.

    Returns:
        Tuple[ndarray, List[ndarrays]]: (keys, groupxs) -
            keys (ndarray):
                The unique elements of the input array in order
            groupxs (List[ndarray]):
                Corresponding list of indexes.  The i-th item is an array
                indicating the indices where the item ``key[i]`` appeared in
                the input array.

    Example:
        >>> # xdoctest: +IGNORE_WHITESPACE
        >>> import ubelt as ub
        >>> idx_to_groupid = np.array([2, 1, 2, 1, 2, 1, 2, 3, 3, 3, 3])
        >>> (keys, groupxs) = group_indices(idx_to_groupid)
        >>> print(ub.repr2(keys, with_dtype=False))
        >>> print(ub.repr2(groupxs, with_dtype=False))
        np.array([1, 2, 3])
        [
            np.array([1, 3, 5]),
            np.array([0, 2, 4, 6]),
            np.array([ 7,  8,  9, 10]),
        ]

    Example:
        >>> # xdoctest: +IGNORE_WHITESPACE
        >>> import ubelt as ub
        >>> idx_to_groupid = np.array([[  24], [ 129], [ 659], [ 659], [ 24],
        ...       [659], [ 659], [ 822], [ 659], [ 659], [24]])
        >>> # 2d arrays must be flattened before coming into this function so
        >>> # information is on the last axis
        >>> (keys, groupxs) = group_indices(idx_to_groupid.T[0])
        >>> print(ub.repr2(keys, with_dtype=False))
        >>> print(ub.repr2(groupxs, with_dtype=False))
        np.array([ 24, 129, 659, 822])
        [
            np.array([ 0,  4, 10]),
            np.array([1]),
            np.array([2, 3, 5, 6, 8, 9]),
            np.array([7]),
        ]

    Example:
        >>> # xdoctest: +IGNORE_WHITESPACE
        >>> import ubelt as ub
        >>> idx_to_groupid = np.array([True, True, False, True, False, False, True])
        >>> (keys, groupxs) = group_indices(idx_to_groupid)
        >>> print(ub.repr2(keys, with_dtype=False))
        >>> print(ub.repr2(groupxs, with_dtype=False))
        np.array([False,  True])
        [
            np.array([2, 4, 5]),
            np.array([0, 1, 3, 6]),
        ]

    Example:
        >>> # xdoctest: +IGNORE_WHITESPACE
        >>> import ubelt as ub
        >>> idx_to_groupid = [('a', 'b'),  ('d', 'b'), ('a', 'b'), ('a', 'b')]
        >>> (keys, groupxs) = group_indices(idx_to_groupid)
        >>> print(ub.repr2(keys, with_dtype=False))
        >>> print(ub.repr2(groupxs, with_dtype=False))
        [
            ('a', 'b'),
            ('d', 'b'),
        ]
        [
            np.array([0, 2, 3]),
            np.array([1]),
        ]
    """
    _idx_to_groupid_orig = idx_to_groupid
    idx_to_groupid = np.array(idx_to_groupid, copy=False)
    _n_item = idx_to_groupid.size
    _dtype = idx_to_groupid.dtype
    _kind = _dtype.kind
    if _kind == 'U' or _kind == 'O':
        # fallback to slower algorithm for non-numeric data
        group = ub.group_items(range(_n_item), _idx_to_groupid_orig)
        try:
            # attempt to return values in a consistant order
            sortx = ub.argsort(list(group.keys()))
            keys = list(ub.take(list(group.keys()), sortx))
            groupxs = list(ub.take(list(map(np.array, group.values())), sortx))
        except Exception:
            keys = list(group.keys())
            groupxs = list(map(np.array, group.values()))
        return keys, groupxs

    # Sort items and idx_to_groupid by groupid
    if assume_sorted:
        sortx = np.arange(len(idx_to_groupid))
        groupids_sorted = idx_to_groupid
    else:
        sortx = idx_to_groupid.argsort()
        groupids_sorted = idx_to_groupid.take(sortx)

    if _kind == 'b':
        # Ensure bools are internally cast to integers
        # However, be sure that the groups are returned as the original dtype
        _groupids = groupids_sorted.astype(np.int8)
    else:
        _groupids = groupids_sorted

    # Find the boundaries between groups
    diff = np.ones(_n_item + 1, _groupids.dtype)
    np.subtract(_groupids[1:], _groupids[:-1], out=diff[1:_n_item])
    idxs = np.flatnonzero(diff)
    # Groups are between bounding indexes
    groupxs = [sortx[lx:rx] for lx, rx in zip(idxs, idxs[1:])]  # 34.5%
    # Unique group keys
    keys = groupids_sorted[idxs[:-1]]
    return keys, groupxs
Ejemplo n.º 25
0
def 数组_排序索引(indexable, key=None, reverse=False):
    return ub.argsort(indexable, key, reverse)
Ejemplo n.º 26
0
def _precompute_class_weights(dset, mode='median-idf'):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples'))
        >>> from sseg_camvid import *  # NOQA
        >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize()
        >>> dset = harn.datasets['train']
    """

    assert mode in ['median-idf', 'log-median-idf']

    total_freq = _cached_class_frequency(dset)

    def logb(arr, base):
        if base == 'e':
            return np.log(arr)
        elif base == 2:
            return np.log2(arr)
        elif base == 10:
            return np.log10(arr)
        else:
            out = np.log(arr)
            out /= np.log(base)
            return out

    _min, _max = np.percentile(total_freq, [5, 95])
    is_valid = (_min <= total_freq) & (total_freq <= _max)
    if np.any(is_valid):
        middle_value = np.median(total_freq[is_valid])
    else:
        middle_value = np.median(total_freq)

    # variant of median-inverse-frequency
    nonzero_freq = total_freq[total_freq != 0]
    if len(nonzero_freq):
        total_freq[total_freq == 0] = nonzero_freq.min() / 2

    if mode == 'median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
    elif mode == 'log-median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
        base = 2
        base = np.exp(1)
        weights = logb(weights + (base - 1), base)
        weights = np.maximum(weights, .1)
        weights = np.minimum(weights, 10)
    else:
        raise KeyError('mode = {!r}'.format(mode))

    weights = np.round(weights, 2)
    cname_to_weight = ub.dzip(dset.classes, weights)
    print('weights: ' + ub.repr2(cname_to_weight))

    if False:
        # Inspect the weights
        import kwplot
        kwplot.autoplt()

        cname_to_weight = ub.dzip(dset.classes, weights)
        cname_to_weight = ub.dict_subset(cname_to_weight, ub.argsort(cname_to_weight))
        kwplot.multi_plot(
            ydata=list(cname_to_weight.values()),
            kind='bar',
            xticklabels=list(cname_to_weight.keys()),
            xtick_rotation=90,
            fnum=2, doclf=True)

    return weights
Ejemplo n.º 27
0
def main():
    candidates = None
    mode = 'ultra'
    if mode == 'great':
        candidate_csv_text = ub.codeblock(
            '''
            registeel,LOCK_ON,FLASH_CANNON,FOCUS_BLAST,22,10,14,15
            stunfisk_galarian,MUD_SHOT,ROCK_SLIDE,EARTHQUAKE,25,11,14,14
            # altaria,DRAGON_BREATH,SKY_ATTACK,DRAGON_PULSE,26.5,14,12,13

            skarmory,AIR_SLASH,SKY_ATTACK,FLASH_CANNON,26,11,13,10

            azumarill,BUBBLE,ICE_BEAM,HYDRO_PUMP,38,12,15,13
            dewgong,ICE_SHARD,ICY_WIND,WATER_PULSE,26.5,15,08,15

            # umbreon,SNARL,FOUL_PLAY,LAST_RESORT,24.5,15,10,15
            # farfetchd_galarian,FURY_CUTTER,LEAF_BLADE,BRAVE_BIRD,33.5,12,15,15

            hypno,CONFUSION,SHADOW_BALL,THUNDER_PUNCH,25.5,13,15,14
            # hypno,CONFUSION,SHADOW_BALL,FOCUS_BLAST,25.5,13,15,14

            # machamp-shadow,COUNTER,ROCK_SLIDE,CROSS_CHOP,18,5,11,10
            victreebel_shadow-shadow,RAZOR_LEAF,LEAF_BLADE,FRUSTRATION,22.5,4,14,14
            ''')

        candidate_explicit = [
            Pokemon('medicham', ivs=[7, 15, 14], level=41.5),
            Pokemon('medicham', ivs=[7, 15, 14], level=43.0),
            Pokemon('medicham', ivs=[7, 15, 14]).maximize(1500),
            Pokemon('machamp', [1, 15, 6], cp=1493),
            Pokemon('altaria', [1, 11, 8], cp=1496),
            Pokemon('skarmory', [0, 15, 13], cp=1495),
            Pokemon('umbreon', [1, 8, 8], cp=1495),
            Pokemon('registeel', [10, 14, 15], cp=1487),
            Pokemon('stunfisk', [11, 14, 14], form='Galarian', cp=1498),
            Pokemon('cresselia', [7, 14, 8], cp=1493),
            Pokemon('vigoroth', [0, 10, 9], cp=1495),
            Pokemon('drifblim', [4, 14, 13], cp=1498),
            Pokemon('haunter', [6, 13, 15], cp=1498),
            Pokemon('mantine', [6, 13, 14], cp=1497),
            Pokemon('politoed', [3, 5, 13], cp=1493),
            Pokemon('charizard', [3, 15, 14], cp=1485),
            Pokemon('gengar', [5, 11, 14], cp=1483),
            Pokemon('mew', [15, 12, 11], cp=1470),
            Pokemon('dewgong', [15, 8, 15]).maximize(1500),
            Pokemon('azumarill', [12, 15, 13]).maximize(1500),
            Pokemon('hypno', [13, 15, 14]).maximize(1500),
        ]
        for cand in candidate_explicit:
            cand.populate_cp()
        stat_products = [cand.stat_product for cand in candidate_explicit]
        sortx = ub.argsort(stat_products)
        candidate_explicit = list(ub.take(candidate_explicit, sortx))
        stat_products = list(ub.take(stat_products, sortx))
        print('stat_products = {}'.format(ub.repr2(stat_products, nl=1)))
        print('candidate_explicit = {}'.format(ub.repr2(candidate_explicit, nl=1)))

        for cand in candidate_explicit:
            print('cand.adjusted = {}, {:.2f}, {}'.format(ub.repr2(cand.adjusted, nl=0, precision=2), cand.stat_product, cand))

    if mode == 'ultra':
        candidate_csv_text = ub.codeblock(
            '''
            cresselia,PSYCHO_CUT,MOONBLAST,FUTURE_SIGHT
            togekiss,CHARM,FLAMETHROWER,ANCIENT_POWER
            articuno,ICE_SHARD,ICY_WIND,HURRICANE
            swampert,MUD_SHOT,MUDDY_WATER,EARTHQUAKE
            venusaur,VINE_WHIP,FRENZY_PLANT,SLUDGE_BOMB
            ''')

        candidates = [
            Pokemon('Gengar', (7, 14, 14), cp=2500, moves=['SHADOW_CLAW', 'SHADOW_PUNCH', 'SHADOW_BALL']),
            Pokemon('Togekiss', (15, 15, 14), cp=2469, moves=['CHARM', 'FLAMETHROWER', 'AERIAL_ACE']),
            Pokemon('Venusaur', (15, 13, 13), cp=2482, moves=['VINE_WHIP', 'FRENZY_PLANT', 'SLUDGE_BOMB']),
            Pokemon('Muk', (9, 7, 4), cp=2486, form='Alola', moves=['SNARL', 'DARK_PULSE', 'SLUDGE_WAVE']),
            Pokemon('Swampert', (0, 2, 14), cp=2500, moves=['WATER_GUN', 'HYDRO_CANNON', 'SLUDGE_WAVE']),
            Pokemon('Empoleon', (0, 10, 14), cp=2495, moves=['WATERFALL', 'HYDRO_CANNON', 'DRILL_PECK']),
            Pokemon('sirfetch’d', (4, 11, 12), cp=2485, form='Galarian', moves=['COUNTER', 'CLOSE_COMBAT', 'LEAF_BLADE']),
        ]
    # else:
    #     raise KeyError(mode)

    if candidates is None:
        candidates = []
        for line in candidate_csv_text.split('\n'):
            line = line.strip()
            if line.startswith('#'):
                continue
            if line:
                row = line.split(',')
                cand = Pokemon.from_pvpoke_row(row)
                candidates.append(cand)

    print(ub.repr2(api.learnable))

    if mode == 'ultra':
        base = 'https://pvpoke.com/team-builder/all/2500'
        base = 'https://pvpoke.com/team-builder/premier/2500'
    elif mode == 'great':
        base = 'https://pvpoke.com/team-builder/all/1500'
    sep = '%2C'
    import itertools as it
    print('candidates = {!r}'.format(candidates))
    for team in it.combinations(candidates, 3):
        # if not any('registeel' in p.name for p in team):
        #     continue
        # if not any('victree' in p.name for p in team):
        #     continue
        # if len(set(p.name for p in team)) != 3:
        #     continue
        suffix = sep.join([p.to_pvpoke_url() for p in team])
        url = base + '/' + suffix
        print(url)
Ejemplo n.º 28
0
    def argparse(self, parser=None, special_options=False):
        """
        construct or update an argparse.ArgumentParser CLI parser

        Args:
            parser (None | argparse.ArgumentParser): if specified this
                parser is updated with options from this config.

            special_options (bool, default=False):
                adds special scriptconfig options, namely: --config, --dumps,
                and --dump.

        Returns:
            argparse.ArgumentParser : a new or updated argument parser

        CommandLine:
            xdoctest -m scriptconfig.config Config.argparse:0
            xdoctest -m scriptconfig.config Config.argparse:1

        TODO:
            A good CLI spec for lists might be

            # In the case where ``key`` ends with and ``=``, assume the list is
            # given as a comma separated string with optional square brakets at
            # each end.

            --key=[f]

            # In the case where ``key`` does not end with equals and we know
            # the value is supposd to be a list, then we consume arguments
            # until we hit the next one that starts with '--' (which means
            # that list items cannot start with -- but they can contains
            # commas)

        FIXME:

            * In the case where we have an nargs='+' action, and we specify
              the option with an `=`, and then we give position args after it
              there is no way to modify behavior of the action to just look at
              the data in the string without modifying the ArgumentParser
              itself. The action object has no control over it. For example
              `--foo=bar baz biz` will parse as `[baz, biz]` which is really
              not what we want. We may be able to overload ArgumentParser to
              fix this.

        Example:
            >>> # You can now make instances of this class
            >>> import scriptconfig
            >>> self = scriptconfig.Config.demo()
            >>> parser = self.argparse()
            >>> parser.print_help()
            >>> # xdoctest: +REQUIRES(PY3)
            >>> # Python2 argparse does a hard sys.exit instead of raise
            >>> ns, extra = parser.parse_known_args()

        Example:
            >>> # You can now make instances of this class
            >>> import scriptconfig as scfg
            >>> class MyConfig(scfg.Config):
            >>>     description = 'my CLI description'
            >>>     default = {
            >>>         'path1':  scfg.Value(None, position=1, alias='src'),
            >>>         'path2':  scfg.Value(None, position=2, alias='dst'),
            >>>         'dry':  scfg.Value(False, isflag=True),
            >>>         'approx':  scfg.Value(False, isflag=False, alias=['a1', 'a2']),
            >>>     }
            >>> self = MyConfig()
            >>> special_options = True
            >>> parser = None
            >>> parser = self.argparse(special_options=special_options)
            >>> parser.print_help()
            >>> self._read_argv(argv=['objection', '42', '--path1=overruled!'])
            >>> print('self = {!r}'.format(self))

        Ignore:
            >>> self._read_argv(argv=['hi','--path1=foobar'])
            >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar'])
            >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar', '--help'])
            >>> self._read_argv(argv=['--path1=foobar', '--path1=baz'])
            >>> print('self = {!r}'.format(self))
        """
        import argparse

        if parser is None:
            parserkw = self._parserkw()
            parser = argparse.ArgumentParser(**parserkw)

        # Use custom action used to mark which values were explicitly set on
        # the commandline
        parser._explicitly_given = set()

        parent = self

        class ParseAction(argparse.Action):
            def __init__(self, *args, **kwargs):
                super(ParseAction, self).__init__(*args, **kwargs)
                # with script config nothing should be required by default all
                # positional arguments should have keyword arg variants Setting
                # required=False here will prevent positional args from
                # erroring if they are not specified. I dont think there are
                # other side effects, but we should make sure that is actually
                # the case.
                self.required = False

                if self.type is None:
                    # Is this the right place to put this?
                    def _mytype(value):
                        key = self.dest
                        template = parent.default[key]
                        if not isinstance(template, Value):
                            # smartcast non-valued params from commandline
                            value = smartcast.smartcast(value)
                        else:
                            value = template.cast(value)
                        return value

                    self.type = _mytype

                # print('self.type = {!r}'.format(self.type))

            def __call__(action, parser, namespace, values, option_string=None):
                # print('CALL action = {!r}'.format(action))
                # print('option_string = {!r}'.format(option_string))
                # print('values = {!r}'.format(values))

                if isinstance(values, list) and len(values):
                    # We got a list of lists, which we hack into a flat list
                    if isinstance(values[0], list):
                        import itertools as it
                        values = list(it.chain(*values))

                setattr(namespace, action.dest, values)
                parser._explicitly_given.add(action.dest)

        # IRC: this ensures each key has a real Value class
        _metadata = {
            key: self._data[key]
            for key, value in self._default.items()
            if isinstance(self._data[key], Value)
        }  # :type: Dict[str, Value]
        _positions = {k: v.position for k, v in _metadata.items()
                      if v.position is not None}
        if _positions:
            if ub.find_duplicates(_positions.values()):
                raise Exception('two values have the same position')
            _keyorder = ub.oset(ub.argsort(_positions))
            _keyorder |= (ub.oset(self._default) - _keyorder)
        else:
            _keyorder = list(self._default.keys())

        def _add_arg(parser, name, key, argkw, positional, isflag, isalias):
            _argkw = argkw.copy()

            if isalias:
                _argkw['help'] = 'alias of {}'.format(key)
                _argkw.pop('default', None)
                # flags cannot have flag aliases
                isflag = False

            elif positional:
                parser.add_argument(name, **_argkw)

            if isflag:
                # Can we support both flag and setitem methods of cli
                # parsing?
                if not isinstance(_argkw.get('default', None), bool):
                    raise ValueError('can only use isflag with bools')
                _argkw.pop('type', None)
                _argkw.pop('choices', None)
                _argkw.pop('action', None)
                _argkw.pop('nargs', None)
                _argkw['dest'] = key

                _argkw_true = _argkw.copy()
                _argkw_true['action'] = 'store_true'

                _argkw_false = _argkw.copy()
                _argkw_false['action'] = 'store_false'
                _argkw_false.pop('help', None)

                parser.add_argument('--' + name, **_argkw_true)
                parser.add_argument('--no-' + name, **_argkw_false)
            else:
                parser.add_argument('--' + name, **_argkw)

        mode = 1

        alias_registry = []
        for key, value in self._data.items():
            # key: str
            # value: Any | Value
            argkw = {}
            argkw['help'] = ''
            positional = None
            isflag = False
            if key in _metadata:
                # Use the metadata in the Value class to enhance argparse
                _value = _metadata[key]
                argkw.update(_value.parsekw)
                value = _value.value
                isflag = _value.isflag
                positional = _value.position
            else:
                _value = value if isinstance(value, Value) else None

            if not argkw['help']:
                argkw['help'] = '<undocumented>'

            argkw['default'] = value
            argkw['action'] = ParseAction

            name = key
            _add_arg(parser, name, key, argkw, positional, isflag, isalias=False)

            if _value is not None:
                if _value.alias:
                    alts = _value.alias
                    alts = alts if ub.iterable(alts) else [alts]
                    for alias in alts:
                        tup = (alias, key, argkw)
                        alias_registry.append(tup)
                        if mode == 0:
                            name = alias
                            _add_arg(parser, name, key, argkw, positional, isflag, isalias=True)

        if mode == 1:
            for tup in alias_registry:
                (alias, key, argkw) = tup
                name = alias
                dest = key
                _add_arg(parser, name, dest, argkw, positional, isflag, isalias=True)

        if special_options:
            parser.add_argument('--config', default=None, help=ub.codeblock(
                '''
                special scriptconfig option that accepts the path to a on-disk
                configuration file, and loads that into this {!r} object.
                ''').format(self.__class__.__name__))

            parser.add_argument('--dump', default=None, help=ub.codeblock(
                '''
                If specified, dump this config to disk.
                ''').format(self.__class__.__name__))

            parser.add_argument('--dumps', action='store_true', help=ub.codeblock(
                '''
                If specified, dump this config stdout
                ''').format(self.__class__.__name__))

        return parser
Ejemplo n.º 29
0
def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()
Ejemplo n.º 30
0
 def freq_group(items, groupids):
     groups = ub.group_items(items, groupids)
     hist = ub.map_vals(len, groups)
     for k in ub.argsort(hist):
         yield groups[k]