Ejemplo n.º 1
0
 def __call__(self, *args, **kw):
     # newbase = base | (kw & options)  # imagine all the syntax
     if self._magic:
         kw = self._magic(kw)
     newbase = ub.dict_union(self._base, ub.dict_isect(kw, self._options))
     new = Element(newbase, self._options, self._magic)
     return new
Ejemplo n.º 2
0
    def binarize_peritem(cfsn_vecs, negative_classes=None):
        """
        Creates a binary representation useful for measuring the performance of
        detectors. It is assumed that scores of "positive" classes should be
        high and "negative" clases should be low.

        Args:
            negative_classes (List[str | int]): list of negative class names or
                idxs, by default chooses any class with a true class index of
                -1. These classes should ideally have low scores.

        Example:
            >>> # xdoctest: +REQUIRES(module:ndsampler)
            >>> from netharn.metrics import DetectionMetrics
            >>> dmet = DetectionMetrics.demo(
            >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
            >>> cfsn_vecs = dmet.confusion_vectors()
            >>> class_idxs = list(dmet.classes.node_to_idx.values())
            >>> binvecs = cfsn_vecs.binarize_peritem()
        """
        import kwarray
        # import warnings
        # warnings.warn('binarize_peritem DOES NOT PRODUCE CORRECT RESULTS')

        negative_cidxs = {-1}
        if negative_classes is not None:

            @ub.memoize
            def _lower_classes():
                if cfsn_vecs.classes is None:
                    raise Exception(
                        'classes must be known if negative_classes are strings'
                    )
                return [c.lower() for c in cfsn_vecs.classes]

            for c in negative_classes:
                import six
                if isinstance(c, six.string_types):
                    classes = _lower_classes()
                    try:
                        cidx = classes.index(c)
                    except Exception:
                        continue
                else:
                    cidx = int(c)
                negative_cidxs.add(cidx)

        is_false = kwarray.isect_flags(cfsn_vecs.data['true'], negative_cidxs)

        _data = {
            'is_true': ~is_false,
            'pred_score': cfsn_vecs.data['score'],
        }
        extra = ub.dict_isect(_data, ['txs', 'pxs', 'gid', 'weight'])
        _data.update(extra)
        bin_data = kwarray.DataFrameArray(_data)
        binvecs = BinaryConfusionVectors(bin_data)
        return binvecs
Ejemplo n.º 3
0
def bench_dict_isect():
    import ubelt as ub

    def random_dict(n):
        import random
        keys = set(random.randint(0, n) for _ in range(n))
        return {k: k for k in keys}

    d1 = random_dict(1000)
    d2 = random_dict(1000)

    import xdev
    xdev.profile_now(ub.dict_isect)(d1, d2)
    xdev.profile_now(dict_isect_variant0)(d1, d2)
    xdev.profile_now(dict_isect_variant1)(d1, d2)
    xdev.profile_now(dict_isect_variant2)(d1, d2)
    xdev.profile_now(dict_isect_variant3)(d1, d2)

    import timerit
    ti = timerit.Timerit(100, bestof=10, verbose=2)
    for timer in ti.reset('current'):
        with timer:
            ub.dict_isect(d1, d2)

    for timer in ti.reset('inline'):
        with timer:
            {k: v for k, v in d1.items() if k in d2}

    for timer in ti.reset('dict_isect_variant0'):
        with timer:
            dict_isect_variant0(d1, d2)

    for timer in ti.reset('dict_isect_variant1'):
        with timer:
            dict_isect_variant1(d1, d2)

    for timer in ti.reset('dict_isect_variant2'):
        with timer:
            dict_isect_variant1(d1, d2)

    for timer in ti.reset('dict_isect_variant3'):
        with timer:
            dict_isect_variant3(d1, d2)

    print('ti.rankings = {}'.format(ub.repr2(ti.rankings['min'], precision=8, align=':', nl=1, sort=0)))
Ejemplo n.º 4
0
    def coerce(cls, data=None, **kwargs):
        """
        Attempt to coerce the data into an affine object

        Args:
            data : some data we attempt to coerce to an Affine matrix
            **kwargs : some data we attempt to coerce to an Affine matrix,
                mutually exclusive with `data`.

        Returns:
            Affine

        Example:
            >>> import kwimage
            >>> kwimage.Affine.coerce({'type': 'affine', 'matrix': [[1, 0, 0], [0, 1, 0]]})
            >>> kwimage.Affine.coerce({'scale': 2})
            >>> kwimage.Affine.coerce({'offset': 3})
            >>> kwimage.Affine.coerce(np.eye(3))
            >>> kwimage.Affine.coerce(None)
            >>> kwimage.Affine.coerce(skimage.transform.AffineTransform(scale=30))
        """
        if data is None and not kwargs:
            return cls(matrix=None)
        if data is None:
            data = kwargs
        if isinstance(data, np.ndarray):
            self = cls(matrix=data)
        elif isinstance(data, cls):
            self = data
        elif isinstance(data, skimage.transform.AffineTransform):
            self = cls(matrix=data.params)
        elif data.__class__.__name__ == cls.__name__:
            self = data
        elif isinstance(data, dict):
            keys = set(data.keys())
            known_params = {'scale', 'shear', 'offset', 'theta', 'type'}
            params = ub.dict_isect(data, known_params)
            if 'matrix' in keys:
                self = cls(matrix=np.array(data['matrix']))
            elif len(known_params & keys):
                params.pop('type', None)
                self = cls.affine(**params)
            else:
                raise KeyError(', '.join(list(data.keys())))
        else:
            raise TypeError(type(data))
        return self
Ejemplo n.º 5
0
    def coarsen(cfsn_vecs, cxs):
        """
        Creates a coarsened set of vectors
        """
        import ndsampler
        import kwarray
        assert cfsn_vecs.probs is not None, 'need probs'
        if not isinstance(cfsn_vecs.classes, ndsampler.CategoryTree):
            raise TypeError('classes must be a ndsampler.CategoryTree')

        descendent_map = cfsn_vecs.classes.idx_to_descendants_idxs(
            include_cfsn_vecs=True)
        valid_descendant_mapping = ub.dict_isect(descendent_map, cxs)
        # mapping from current category indexes to the new coarse ones
        # Anything without an explicit key will be mapped to background

        bg_idx = cfsn_vecs.classes.index('background')
        mapping = {
            v: k
            for k, vs in valid_descendant_mapping.items() for v in vs
        }
        new_true = np.array(
            [mapping.get(x, bg_idx) for x in cfsn_vecs.data['true']])
        new_pred = np.array(
            [mapping.get(x, bg_idx) for x in cfsn_vecs.data['pred']])

        new_score = np.array([p[x] for x, p in zip(new_pred, cfsn_vecs.probs)])

        new_y_df = {
            'true': new_true,
            'pred': new_pred,
            'score': new_score,
            'weight': cfsn_vecs.data['weight'],
            'txs': cfsn_vecs.data['txs'],
            'pxs': cfsn_vecs.data['pxs'],
            'gid': cfsn_vecs.data['gid'],
        }
        new_y_df = kwarray.DataFrameArray(new_y_df)
        coarse_cfsn_vecs = ConfusionVectors(new_y_df, cfsn_vecs.classes,
                                            cfsn_vecs.probs)
        return coarse_cfsn_vecs
Ejemplo n.º 6
0
    def __init__(self, num, dims=2, rng=None, **kwargs):
        self.rng = kwarray.ensure_rng(rng)
        self.num = num
        self.dims = dims

        # self.config = {
        #     'perception_thresh': 0.08,
        #     'max_speed': 0.005,
        #     'max_force': 0.0003,
        # }
        self.config = {
            'perception_thresh': 0.2,
            'max_speed': 0.01,
            'max_force': 0.001,
            'damping': 0.99,
        }
        self.config.update(ub.dict_isect(kwargs, self.config))

        self.pos = None
        self.vel = None
        self.acc = None
Ejemplo n.º 7
0
 def populate_from(self, dset):
     from sqlalchemy import inspect
     session = self.session
     inspector = inspect(self.engine)
     for key in self.engine.table_names():
         colinfo = inspector.get_columns(key)
         colnames = {c['name'] for c in colinfo}
         # TODO: is there a better way to grab this information?
         cls = TBLNAME_TO_CLASS[key]
         for item in dset.dataset.get(key, []):
             item_ = ub.dict_isect(item, colnames)
             # Everything else is a foreign key
             item['foreign'] = ub.dict_diff(item, item_)
             if key == 'annotations':
                 # Need custom code to translate list-based properties
                 x, y, w, h = item['bbox']
                 item_['bbox_x'] = x
                 item_['bbox_y'] = y
                 item_['bbox_w'] = w
                 item_['bbox_h'] = h
             row = cls(**item_)
             session.add(row)
     session.commit()
Ejemplo n.º 8
0
 def demo(cfsn_vecs):
     """
     Example:
         >>> # xdoctest: +REQUIRES(module:ndsampler)
         >>> cfsn_vecs = ConfusionVectors.demo()
         >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
         >>> cx_to_binvecs = cfsn_vecs.binarize_ovr()
         >>> print('cx_to_binvecs = {!r}'.format(cx_to_binvecs))
     """
     from netharn.metrics import DetectionMetrics
     dmet = DetectionMetrics.demo(nimgs=10,
                                  nboxes=(0, 10),
                                  n_fp=(0, 1),
                                  nclasses=3)
     # print('dmet = {!r}'.format(dmet))
     cfsn_vecs = dmet.confusion_vectors()
     cfsn_vecs.data._data = ub.dict_isect(cfsn_vecs.data._data, [
         'true',
         'pred',
         'score',
         'weight',
     ])
     return cfsn_vecs
Ejemplo n.º 9
0
    def coerce(config={}, **kw):
        """
        Accepts keywords:
            optimizer / optim :
                can be sgd, adam, adamw, rmsprop

            learning_rate / lr :
                a float

            weight_decay / decay :
                a float

            momentum:
                a float, only used if the optimizer accepts it

            params:
                This is a SPECIAL keyword that is handled differently.  It is
                interpreted by `netharn.hyper.Hyperparams.make_optimizer`.

                In this simplest case you can pass "params" as a list of torch
                parameter objects or a list of dictionaries containing param
                groups and special group options (just as you would when
                constructing an optimizer from scratch). We don't recommend
                this while using netharn unless you know what you are doing
                (Note, that params will correctly change device if the model is
                mounted).

                In the case where you do not want to group parameters with
                different options, it is best practice to simply not specify
                params.

                In the case where you want to group parameters set params to
                either a List[Dict] or a Dict[str, Dict].

                The items / values of this collection should be a dictionary.
                The keys / values of this dictionary should be the per-group
                optimizer options. Additionally, there should be a key "params"
                (note this is a nested per-group params not to be confused with
                the top-level "params").

                Each per-group "params" should be either (1) a list of
                parameter names (preferred), (2) a string that specifies a
                regular expression (matching layer names will be included in
                this group), or (3) a list of parameter objects.

                For example, the top-level params might look like:

                    params={
                        'head': {'lr': 0.003, 'params': '.*head.*'},
                        'backbone': {'lr': 0.001, 'params': '.*backbone.*'},
                        'preproc': {'lr': 0.0, 'params': [
                            'model.conv1', 'model.norm1', , 'model.relu1']}
                    }

                Note that head and backbone specify membership via regular
                expression whereas preproc explicitly specifies a list of
                parameter names.

        Notes:
            pip install torch-optimizer

        Returns:
            Tuple[type, dict]: a type and arguments to construct it

        References:
            https://datascience.stackexchange.com/questions/26792/difference-between-rmsprop-with-momentum-and-adam-optimizers
            https://github.com/jettify/pytorch-optimizer

        CommandLine:
            xdoctest -m /home/joncrall/code/netharn/netharn/api.py Optimizer.coerce

        Example:
            >>> config = {'optimizer': 'sgd', 'params': [
            >>>     {'lr': 3e-3, 'params': '.*head.*'},
            >>>     {'lr': 1e-3, 'params': '.*backbone.*'},
            >>> ]}
            >>> optim_ = Optimizer.coerce(config)

            >>> # xdoctest: +REQUIRES(module:torch_optimizer)
            >>> from netharn.api import *  # NOQA
            >>> config = {'optimizer': 'DiffGrad'}
            >>> optim_ = Optimizer.coerce(config, lr=1e-5)
            >>> print('optim_ = {!r}'.format(optim_))
            >>> assert optim_[1]['lr'] == 1e-5

            >>> config = {'optimizer': 'Yogi'}
            >>> optim_ = Optimizer.coerce(config)
            >>> print('optim_ = {!r}'.format(optim_))

            >>> from netharn.api import *  # NOQA
            >>> Optimizer.coerce({'optimizer': 'ASGD'})

        TODO:
            - [ ] https://pytorch.org/blog/stochastic-weight-averaging-in-pytorch/
        """
        import netharn as nh
        config = _update_defaults(config, kw)
        key = config.get('optimizer', config.get('optim', 'sgd')).lower()
        lr = config.get('learning_rate', config.get('lr', 3e-3))
        decay = config.get('weight_decay', config.get('decay', 0))
        momentum = config.get('momentum', 0.9)
        params = config.get('params', None)
        # TODO: allow for "discriminative fine-tuning"
        if key == 'sgd':
            cls = torch.optim.SGD
            kw = {
                'lr': lr,
                'weight_decay': decay,
                'momentum': momentum,
                'nesterov': True,
            }
        elif key == 'adam':
            cls = torch.optim.Adam
            kw = {
                'lr': lr,
                'weight_decay': decay,
                # 'betas': (0.9, 0.999),
                # 'eps': 1e-8,
                # 'amsgrad': False
            }
        elif key == 'adamw':
            if _TORCH_IS_GE_1_2_0:
                from torch.optim import AdamW
                cls = AdamW
            else:
                cls = nh.optimizers.AdamW
            kw = {
                'lr': lr,
                # 'betas': (0.9, 0.999),
                # 'eps': 1e-8,
                # 'amsgrad': False
            }
        elif key == 'rmsprop':
            cls = torch.optim.RMSprop
            kw = {
                'lr': lr,
                'weight_decay': decay,
                'momentum': momentum,
                'alpha': 0.9,
            }
        else:
            from netharn.util import util_inspect
            _lut = {}

            optim_modules = [
                torch.optim,
            ]

            try:
                # Allow coerce to use torch_optimizer package if available
                import torch_optimizer
            except Exception:
                torch_optimizer = None
            else:
                optim_modules.append(torch_optimizer)
                _lut.update({
                    k.lower(): c.__name__
                    for k, c in torch_optimizer._NAME_OPTIM_MAP.items()})

            _lut.update({
                k.lower(): k for k in dir(torch.optim)
                if not k.startswith('_')})

            key = _lut[key]

            cls = None
            for module in optim_modules:
                cls = getattr(module, key, None)
                if cls is not None:
                    defaultkw = util_inspect.default_kwargs(cls)
                    kw = defaultkw.copy()
                    kw.update(ub.dict_isect(config, kw))
                    break

        if cls is None:
            raise KeyError(key)

        kw['params'] = params
        optim_ = (cls, kw)
        return optim_
Ejemplo n.º 10
0
    def binarize_ovr(cfsn_vecs,
                     mode=1,
                     keyby='name',
                     ignore_classes={'ignore'}):
        """
        Transforms cfsn_vecs into one-vs-rest BinaryConfusionVectors for each category.

        Args:
            mode (int, default=1): 0 for heirarchy aware or 1 for voc like.
                MODE 0 IS PROBABLY BROKEN
            keyby (int | str) : can be cx or name
            ignore_classes (Set[str]): category names to ignore

        Returns:
            OneVsRestConfusionVectors: which behaves like
                Dict[int, BinaryConfusionVectors]: cx_to_binvecs

        Example:
            >>> # xdoctest: +REQUIRES(module:ndsampler)
            >>> cfsn_vecs = ConfusionVectors.demo()
            >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
            >>> catname_to_binvecs = cfsn_vecs.binarize_ovr(keyby='name')
            >>> print('catname_to_binvecs = {!r}'.format(catname_to_binvecs))

        Notes:
            Consider we want to measure how well we can classify beagles.

            Given a multiclass confusion vector, we need to carefully select a
            subset. We ignore any truth that is coarser than our current label.
            We also ignore any background predictions on irrelevant classes

            y_true     | y_pred     | score
            -------------------------------
            dog        | dog          <- ignore coarser truths
            dog        | cat          <- ignore coarser truths
            dog        | beagle       <- ignore coarser truths
            cat        | dog
            cat        | cat
            cat        | background   <- ignore failures to predict unrelated classes
            cat        | maine-coon
            beagle     | beagle
            beagle     | dog
            beagle     | background
            beagle     | cat
            Snoopy     | beagle
            Snoopy     | cat
            maine-coon | background    <- ignore failures to predict unrelated classes
            maine-coon | beagle
            maine-coon | cat

            Anything not marked as ignore is counted. We count anything marked
            as beagle or a finer grained class (e.g.  Snoopy) as a positive
            case. All other cases are negative. The scores come from the
            predicted probability of beagle, which must be remembered outside
            the dataframe.
        """
        import kwarray

        classes = cfsn_vecs.classes
        data = cfsn_vecs.data

        if mode == 0:
            if cfsn_vecs.probs is None:
                raise ValueError('cannot binarize in mode=0 without probs')
            pdist = classes.idx_pairwise_distance()

        cx_to_binvecs = {}
        for cx in range(len(classes)):
            if classes[cx] == 'background' or classes[cx] in ignore_classes:
                continue

            if mode == 0:
                import warnings
                warnings.warn(
                    'THIS CALCLUATION MIGHT BE WRONG. MANY OTHERS '
                    'IN THIS FILE WERE, AND I HAVENT CHECKED THIS ONE YET')

                # Lookup original probability predictions for the class of interest
                new_scores = cfsn_vecs.probs[:, cx]

                # Determine which truth items have compatible classes
                # Note: we ignore any truth-label that is COARSER than the
                # class-of-interest.
                # E.g: how well do we classify Beagle? -> we should ignore any truth
                # label marked as Dog because it may or may not be a Beagle?
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore', category=RuntimeWarning)
                    dist = pdist[cx]
                    coarser_cxs = np.where(dist < 0)[0]
                    finer_eq_cxs = np.where(dist >= 0)[0]

                is_finer_eq = kwarray.isect_flags(data['true'], finer_eq_cxs)
                is_coarser = kwarray.isect_flags(data['true'], coarser_cxs)

                # Construct a binary data frame to pass to sklearn functions.
                bin_data = {
                    'is_true': is_finer_eq.astype(np.uint8),
                    'pred_score': new_scores,
                    'weight': data['weight'] * (np.float32(1.0) - is_coarser),
                    'txs': cfsn_vecs.data['txs'],
                    'pxs': cfsn_vecs.data['pxs'],
                    'gid': cfsn_vecs.data['gid'],
                }
                bin_data = kwarray.DataFrameArray(bin_data)

                # Ignore cases where we failed to predict an irrelevant class
                flags = (data['pred'] == -1) & (bin_data['is_true'] == 0)
                bin_data['weight'][flags] = 0
                # bin_data = bin_data.compress(~flags)
                bin_cfsn = BinaryConfusionVectors(bin_data, cx, classes)

            elif mode == 1:
                # More VOC-like, not heirarchy friendly

                if cfsn_vecs.probs is not None:
                    # We know the actual score predicted for this category in
                    # this case.
                    is_true = cfsn_vecs.data['true'] == cx
                    pred_score = cfsn_vecs.probs[:, cx]
                else:
                    import warnings
                    warnings.warn(
                        'Binarize ovr is only approximate if not all probabilities are known'
                    )
                    # If we don't know the probabilities for non-predicted
                    # categories then we have to guess.
                    is_true = cfsn_vecs.data['true'] == cx

                    # do we know the actual predicted score for this category?
                    score_is_unknown = data['pred'] != cx
                    pred_score = data['score'].copy()

                    # These scores were for a different class, so assume
                    # other classes were predicted with a uniform prior
                    approx_score = (1 - pred_score[score_is_unknown]) / (
                        len(classes) - 1)

                    # Except in the case where predicted class is -1. In this
                    # case no prediction was actually made (above a threshold)
                    # so the assumed score should be significantly lower, we
                    # conservatively choose zero.
                    unknown_preds = data['pred'][score_is_unknown]
                    approx_score[unknown_preds == -1] = 0

                    pred_score[score_is_unknown] = approx_score

                bin_data = {
                    # is_true denotes if the true class of the item is the
                    # category of interest.
                    'is_true': is_true,
                    'pred_score': pred_score,
                }

                extra = ub.dict_isect(data._data,
                                      ['txs', 'pxs', 'gid', 'weight'])
                bin_data.update(extra)

                bin_data = kwarray.DataFrameArray(bin_data)
                bin_cfsn = BinaryConfusionVectors(bin_data, cx, classes)
            cx_to_binvecs[cx] = bin_cfsn

        if keyby == 'cx':
            cx_to_binvecs = cx_to_binvecs
        elif keyby == 'name':
            cx_to_binvecs = ub.map_keys(cfsn_vecs.classes, cx_to_binvecs)
        else:
            raise KeyError(keyby)

        ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, cfsn_vecs.classes)
        return ovr_cfns
Ejemplo n.º 11
0
def benchmark_dict_diff_impl():
    import ubelt as ub
    import pandas as pd
    import timerit
    import random

    def method_diffkeys(*args):
        first_dict = args[0]
        keys = set(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_diffkeys_list(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        keep_keys = [k for k in first_dict.keys() if k not in remove_keys]
        new = dict((k, first_dict[k]) for k in keep_keys)
        return new

    def method_diffkeys_oset(*args):
        first_dict = args[0]
        keys = ub.oset(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_ifkeys_setcomp(*args):
        first_dict = args[0]
        remove_keys = {k for ks in args[1:] for k in ks}
        new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new1

    def method_ifkeys_setunion(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new2

    def method_ifkeys_getitem(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys)
        return new3

    def method_ifkeys_dictcomp(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: v for k, v in first_dict.items() if k not in remove_keys}
        return new4

    def method_ifkeys_dictcomp_getitem(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys}
        return new4

    method_lut = locals()  # can populate this some other way

    def make_data(num_items, num_other, remove_fraction, keytype):
        if keytype == 'str':
            keytype = str
        if keytype == 'int':
            keytype = int
        first_keys = [random.randint(0, 1000) for _ in range(num_items)]
        k = int(remove_fraction * len(first_keys))
        remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)]
        first_dict = {keytype(k): k for k in first_keys}
        args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets]
        return args

    ti = timerit.Timerit(200, bestof=1, verbose=2)

    basis = {
        'method': [
            # Cant use because unordered
            # 'method_diffkeys',

            # Cant use because python 3.6
            'method_ifkeys_dictcomp',
            'method_ifkeys_dictcomp_getitem',

            'method_ifkeys_setunion',
            'method_ifkeys_getitem',
            'method_diffkeys_list',

            # Probably not good
            # 'method_ifkeys_setcomp',
            # 'method_diffkeys_oset',
        ],
        'num_items': [10, 100, 1000],
        'num_other': [1, 3, 5],
        # 'num_other': [1],
        'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0],
        # 'remove_fraction': [0.2, 0.8],
        'keytype': ['str', 'int'],
        # 'keytype': ['str'],
        # 'param_name': [param values],
    }
    xlabel = 'num_items'
    kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype']
    group_labels = {
        'style': ['num_other', 'keytype'],
        'size': ['remove_fraction'],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        args = make_data(**kwargs)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(*args)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    # for each parameter setting, group all methods with that used those exact
    # comparable params. Then rank how good each method did.  That will be a
    # preference profile. We will give that preference profile a weight (e.g.
    # based on the fastest method in the bunch) and then aggregate them with
    # some voting method.

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    weighted_rankings = ub.ddict(lambda: ub.ddict(float))
    for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

        # Choose a ranking weight scheme
        weight = variants['mean'].min()
        # weight = 1
        for rank, method in enumerate(ranking):
            weighted_rankings[method][rank] += weight
            weighted_rankings[method]['total'] += weight

    # Probably a more robust voting method to do this
    weight_rank_rows = []
    for method_name, ranks in weighted_rankings.items():
        weights = ub.dict_diff(ranks, ['total'])
        p_rank = ub.map_vals(lambda w: w / ranks['total'], weights)

        for rank, w in p_rank.items():
            weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name})
    weight_rank_df = pd.DataFrame(weight_rank_rows)
    piv = weight_rank_df.pivot(['name'], ['rank'], ['weight'])
    print(piv)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('skill_agg =\n{}'.format(skill_agg))

    aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values()
    print('weight aggregated =\n{}'.format(aggregated))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('A better x-variable description')
        ax.set_ylabel('A better y-variable description')
Ejemplo n.º 12
0
def benchmark_mul_vs_pow():
    import ubelt as ub
    import pandas as pd
    import timerit

    from functools import reduce
    import operator as op
    import itertools as it

    def method_pow_via_mul_raw(n):
        """ Construct a function that does multiplication of a value n times """
        return eval('lambda v: ' + ' * '.join(['v'] * n))

    def method_pow_via_mul_for(v, n):
        ret = v
        for _ in range(1, n):
            ret = ret * v
        return ret

    def method_pow_via_mul_reduce(v, n):
        """ Alternative way to multiply a value n times """
        return reduce(op.mul, it.repeat(v, n))

    def method_pow_via_pow(v, n):
        return v ** n

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(500000, bestof=1000, verbose=2)

    basis = {
        'method': ['method_pow_via_mul_raw', 'method_pow_via_pow'],
        'n': list(range(1, 20)),
        'v': ['random-int', 'random-float'],
        # 'param_name': [param values],
    }
    xlabel = 'n'
    kw_labels = ['v', 'n']
    group_labels = {
        'style': ['v'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit

        if params['method'] == 'method_pow_via_mul_raw':
            method = method(kwargs.pop('n'))

        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            import random
            if kwargs['v'] == 'random':
                kwargs['v'] = random.randint(1, 31000) if random.random() > 0.5 else random.random()
            elif kwargs['v'] == 'random-int':
                kwargs['v'] = random.randint(1, 31000)
            elif kwargs['v'] == 'random-float':
                kwargs['v'] = random.random()
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        for time in map(min, ub.chunks(ti.times, ti.bestof)):
            row = {
                # 'mean': ti.mean(),
                'time': time,
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    # data = data.sort_values('time')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('N')
        ax.set_ylabel('Time')
        ax.set_yscale('log')

        plt.show()
Ejemplo n.º 13
0
    def __init__(api):
        api.base = 'https://pogoapi.net/api/v1/'
        api.routes = {
            'pokemon_stats': api.base + 'pokemon_stats.json',
            'current_pokemon_moves': api.base + 'current_pokemon_moves.json',
            'pokemon_evolutions': api.base + 'pokemon_evolutions.json',
            'cp_multiplier': api.base + 'cp_multiplier.json',
            'pokemon_types': api.base + 'pokemon_types.json',
            'charged_moves': api.base + 'charged_moves.json',
            'fast_moves': api.base + 'fast_moves.json',
            'type_effectiveness': api.base + 'type_effectiveness.json',
            'pokemon_powerup_requirements':
            api.base + 'pokemon_powerup_requirements.json',
            'pokemon_candy_to_evolve':
            api.base + 'pokemon_candy_to_evolve.json',
            'pokemon_buddy_distances':
            api.base + 'pokemon_buddy_distances.json',
            'shadow_pokemon': api.base + 'shadow_pokemon.json',
            'pokemon_forms': api.base + 'pokemon_forms.json',
            'pvp_exclusive_pokemon': api.base + 'pvp_exclusive_pokemon.json',
            'galarian_pokemon': api.base + 'galarian_pokemon.json',
            'alolan_pokemon': api.base + 'alolan_pokemon.json',
            'shiny_pokemon': api.base + 'shiny_pokemon.json',
            'mega_pokemon': api.base + 'mega_pokemon.json',
            'baby_pokemon': api.base + 'baby_pokemon.json',
            'nesting_pokemon': api.base + 'nesting_pokemon.json',
            'released_pokemon': api.base + 'released_pokemon.json',
            'pokemon_names': api.base + 'pokemon_names.json',
            'api_hashes': api.base + 'api_hashes.json',
            'pvp_fast_moves': api.base + 'pvp_fast_moves.json',
            'pvp_charged_moves': api.base + 'pvp_charged_moves.json',
        }

        # TODO: determine when to redownload

        api.data = {}
        for key, url in api.routes.items():

            redo = 0
            data_fpath = ub.grabdata(url,
                                     verbose=1,
                                     redo=redo,
                                     expires=24 * 60 * 60)

            with open(data_fpath, 'r') as file:
                data = json.load(file)
            api.data[key] = data

        # Make the API global for now
        pokemon_stats = api.data['pokemon_stats']
        _name_to_stats = ub.group_items(
            pokemon_stats, lambda item: item['pokemon_name'].lower())
        _name_to_stats = dict(_name_to_stats)
        api.name_to_stats = _name_to_stats

        _name_to_moves = ub.group_items(
            api.data['current_pokemon_moves'],
            lambda item: item['pokemon_name'].lower())
        _name_to_moves.default_factory = None
        _name_to_moves = dict(_name_to_moves)

        # base = 'http://pokeapi.co/api/v2/pokemon/'
        api.name_to_moves = _name_to_moves

        evolutions = api.data['pokemon_evolutions']
        _name_to_evolutions = ub.group_items(
            evolutions, lambda item: item['pokemon_name'].lower())
        _name_to_evolutions = dict(_name_to_evolutions)

        for key, form_stats in api.name_to_stats.items():
            if key not in _name_to_evolutions:
                noevos = []
                for s in form_stats:
                    empty = ub.dict_isect(
                        s, {'form', 'pokemon_name', 'pokemon_id'})
                    empty['evolutions'] = []
                    noevos.append(empty)
                _name_to_evolutions[key] = noevos

        _name_to_types = ub.group_items(
            api.data['pokemon_types'],
            lambda item: item['pokemon_name'].lower())
        _name_to_types = dict(_name_to_types)
        api.name_to_type = _name_to_types

        evo_graph = nx.DiGraph()
        for name, form_evo_list in _name_to_evolutions.items():
            for form_evo in form_evo_list:
                u = form_evo['pokemon_name'].lower()
                evo_graph.add_node(u)
                for evo in form_evo['evolutions']:
                    v = evo['pokemon_name'].lower()
                    evo_graph.add_edge(u, v)

        api.name_to_family = {}
        api.name_to_base = {}

        evo_graph.remove_edges_from(nx.selfloop_edges(evo_graph))
        api.evo_graph = evo_graph
        for cc in list(nx.connected_components(api.evo_graph.to_undirected())):
            bases = [n for n in cc if len(evo_graph.pred[n]) == 0]
            base = bases[0]
            for n in cc:
                api.name_to_family[n] = cc
                api.name_to_base[n] = base

        api.name_to_evolutions = _name_to_evolutions

        api.pve_fast_moves = ub.group_items(
            api.data['fast_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pve_fast_moves.default_factory = None

        api.pve_charged_moves = ub.group_items(
            api.data['charged_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pve_charged_moves.default_factory = None

        api.pvp_fast_moves = ub.group_items(
            api.data['pvp_fast_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pvp_fast_moves.default_factory = None

        api.pvp_charged_moves = ub.group_items(
            api.data['pvp_charged_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pvp_charged_moves.default_factory = None

        if 0:
            ub.map_vals(len, api.pve_fast_moves)
            ub.map_vals(len, api.pve_charged_moves)

        api.learnable = {
            # TODO: remove
            'stunfisk_galarian': {
                'fast': [
                    'MUD_SHOT',
                    'METAL_CLAW',
                ],
                'charge': [
                    'EARTHQUAKE',
                    'FLASH_CANNON',
                    'MUDDY_WATER',
                    'ROCK_SLIDE',
                ]
            }
        }
Ejemplo n.º 14
0
def benchmark_template():
    import ubelt as ub
    import pandas as pd
    import timerit

    def method1(x, y, z):
        ret = []
        for i in range((x + y) * z):
            ret.append(i)
        return ret

    def method2(x, y, z):
        ret = [i for i in range((x + y) * z)]
        return ret

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(100, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    basis = {
        'method': ['method1', 'method2'],
        'x': list(range(7)),
        'y': [0, 100],
        'z': [2, 3]
        # 'param_name': [param values],
    }
    xlabel = 'x'
    # Set these to param labels that directly transfer to method kwargs
    kw_labels = ['x', 'y', 'z']
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['y'],
        'size': ['z'],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(), kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            chunk_iter = ub.chunks(ti.times, ti.bestof)
            times = list(map(min, chunk_iter))  # TODO: timerit method for this
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time'
                                          ]].mean().rename({'time': 'mean'},
                                                           axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(
        set(stats_data.columns) -
        {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(),
                                      win_prob)).sort_values(ascending=False)
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y=time_key,
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark Name')
        ax.set_xlabel('Size (todo: A better x-variable description)')
        ax.set_ylabel('Time (todo: A better y-variable description)')
        # ax.set_xscale('log')
        # ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
Ejemplo n.º 15
0
def benchmark_pathlib_vs_fspath():
    import ubelt as ub
    import pathlib
    import pandas as pd
    import random
    import timerit
    import os

    def method_pathlib(inputs):
        p = pathlib.Path(*inputs)

    def method_ospath(inputs):
        p = os.path.join(*inputs)

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(10000, bestof=10, verbose=2)

    basis = {
        'method': ['method_pathlib', 'method_ospath'],
        'num_parts': [2, 4, 8, 12, 16],
    }
    xlabel = 'num_parts'
    kw_labels = []
    group_labels = {
        'style': [],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(), kw_labels)

        n = params['num_parts']
        inputs = [chr(random.randint(97, 120)) for _ in range(n)]
        kwargs['inputs'] = inputs
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Time')
        ax.set_ylabel('Number of parts')
Ejemplo n.º 16
0
def benchmark_repeat_vs_reduce_mul():
    import ubelt as ub
    import pandas as pd
    import timerit

    def reduce_daq_rec(func, arrs):
        if len(arrs) == 1:
            return arrs[0]
        if len(arrs) == 2:
            return func(arrs[0], arrs[1])
        elif len(arrs) == 3:
            return func(func(arrs[0], arrs[1]), arrs[3])
        else:
            arrs1 = arrs[0::2]
            arrs2 = arrs[1::2]
            res1 = reduce_daq_rec(func, arrs1)
            res2 = reduce_daq_rec(func, arrs2)
            res = func(res1, res2)
        return res

    def reduce_daq_iter(func, arrs):
        """
        https://www.baeldung.com/cs/convert-recursion-to-iteration
        https://stackoverflow.com/questions/159590/way-to-go-from-recursion-to-iteration
        arrs = [2, 3, 5, 7, 11, 13, 17, 21]
        """
        raise NotImplementedError
        # TODO: make the iterative version
        from collections import deque
        empty_result = None
        stack = deque([(arrs, empty_result)])
        idx = 0
        while stack:
            print('----')
            print('stack = {}'.format(ub.repr2(list(stack), nl=1)))
            arrs0, result = stack.pop()
            if len(arrs0) == 0:
                raise Exception
            if result is not None:
                # raise Exception
                results = [result]
                while stack:
                    next_arrs0, next_result = stack.pop()
                    if next_result is None:
                        break
                    else:
                        results.append(next_result)
                if results:
                    if len(results) == 1:
                        stack.append((results, results[0]))
                    else:
                        stack.append((results, None))
                if next_result is None:
                    stack.append((next_arrs0, None))
            elif result is None:
                if len(arrs0) == 1:
                    result = arrs0[0]
                    stack.append((arrs0, result))
                    # return arrs0[0]
                if len(arrs0) == 2:
                    result = func(arrs0[0], arrs0[1])
                    stack.append((arrs0, result))
                elif len(arrs0) == 3:
                    result = func(func(arrs0[0], arrs0[1]), arrs0[3])
                    stack.append((arrs0, result))
                else:
                    arrs01 = arrs0[0::2]
                    arrs02 = arrs0[1::2]
                    stack.append((arrs0, empty_result))
                    stack.append((arrs01, empty_result))
                    stack.append((arrs02, empty_result))
                    # res1 = reduce_daq_rec(func, arrs01)
                    # res2 = reduce_daq_rec(func, arrs2)
                    # res = func(res1, res2)
            idx += 1
            if idx > 10:
                raise Exception
        return res

    def method_daq_rec(arrs):
        return reduce_daq_rec(np.multiply, arrs)

    def method_repeat(arrs):
        """
        helper code:
            arr_names = ['a{:02d}'.format(idx) for idx in range(1, 32 + 1)]
            lhs = ', '.join(arr_names)
            rhs = ' * '.join(arr_names)
            print(f'{lhs} = arrs')
            print(f'ret = {rhs}')
        """
        # Hard coded pure python syntax for multiplying
        if len(arrs) == 4:
            a01, a02, a03, a04 = arrs
            ret = a01 * a02 * a03 * a04
        elif len(arrs) == 8:
            a01, a02, a03, a04, a05, a06, a07, a08 = arrs
            ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08
        elif len(arrs) == 32:
            a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 = arrs
            ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 * a09 * a10 * a11 * a12 * a13 * a14 * a15 * a16 * a17 * a18 * a19 * a20 * a21 * a22 * a23 * a24 * a25 * a26 * a27 * a28 * a29 * a30 * a31 * a32
        return ret

    def method_reduce(arrs):
        ret = np.multiply.reduce(arrs)
        return ret

    def method_stack(arrs):
        stacked = np.stack(arrs)
        ret = stacked.prod(axis=0)
        return ret

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(10000, bestof=10, verbose=2)

    basis = {
        'method':
        ['method_repeat', 'method_reduce', 'method_stack', 'method_daq_rec'],
        'arr_size': [10, 100, 1000, 10000],
        'num_arrs': [4, 8, 32],
    }
    xlabel = 'arr_size'
    kw_labels = []
    group_labels = {
        'style': ['num_arrs'],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(), kw_labels)

        arr_size = params['arr_size']
        num_arrs = params['num_arrs']

        arrs = []
        for _ in range(num_arrs):
            arr = np.random.rand(arr_size)
            arrs.append(arr)
        kwargs['arrs'] = arrs
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Array Size')
        ax.set_ylabel('Time')
Ejemplo n.º 17
0
    def __init__(api):
        api.base = 'https://pogoapi.net/api/v1/'
        api.routes = {
            'pokemon_stats': api.base + 'pokemon_stats.json',
            'current_pokemon_moves': api.base + 'current_pokemon_moves.json',
            'pokemon_evolutions': api.base + 'pokemon_evolutions.json',
            'cp_multiplier': api.base + 'cp_multiplier.json',
        }
        api.data = {}
        for key, url in api.routes.items():
            data_fpath = ub.grabdata(url, verbose=1)
            with open(data_fpath, 'r') as file:
                data = json.load(file)
            api.data[key] = data

        # Make the API global for now
        pokemon_stats = api.data['pokemon_stats']
        _name_to_stats = ub.group_items(
            pokemon_stats, lambda item: item['pokemon_name'].lower())
        _name_to_stats = dict(_name_to_stats)
        api.name_to_stats = _name_to_stats

        _name_to_items = ub.group_items(
            api.data['current_pokemon_moves'],
            lambda item: item['pokemon_name'].lower())
        _name_to_items.default_factory = None
        _name_to_items = dict(_name_to_items)

        # base = 'http://pokeapi.co/api/v2/pokemon/'
        api.name_to_moves = _name_to_items

        evolutions = api.data['pokemon_evolutions']
        _name_to_evolutions = ub.group_items(
            evolutions, lambda item: item['pokemon_name'].lower())
        _name_to_evolutions = dict(_name_to_evolutions)

        for key, form_stats in api.name_to_stats.items():
            if key not in _name_to_evolutions:
                noevos = []
                for s in form_stats:
                    empty = ub.dict_isect(
                        s, {'form', 'pokemon_name', 'pokemon_id'})
                    empty['evolutions'] = []
                    noevos.append(empty)
                _name_to_evolutions[key] = noevos

        import networkx as nx
        evo_graph = nx.DiGraph()
        for name, form_evo_list in _name_to_evolutions.items():
            for form_evo in form_evo_list:
                u = form_evo['pokemon_name'].lower()
                evo_graph.add_node(u)
                for evo in form_evo['evolutions']:
                    v = evo['pokemon_name'].lower()
                    evo_graph.add_edge(u, v)

        # if 0:
        #     print(forest_str(evo_graph))

        api.name_to_family = {}
        api.name_to_base = {}
        evo_graph.remove_edges_from(nx.selfloop_edges(evo_graph))
        api.evo_graph = evo_graph
        for cc in list(nx.connected_components(api.evo_graph.to_undirected())):
            bases = [n for n in cc if len(evo_graph.pred[n]) == 0]
            base = bases[0]
            for n in cc:
                api.name_to_family[n] = cc
                api.name_to_base[n] = base

        # base_pokmeon = [n for n in evo_graph.nodes if len(evo_graph.pred[n]) == 0]
        api.name_to_evolutions = _name_to_evolutions

        # api.name_to_family = {}
        # for base in base_pokmeon:
        #     family = list(nx.dfs_postorder_nodes(evo_graph, base))
        #     for name in family:
        #         api.name_to_family[name] = family
        #         evos = api.name_to_evolutions[name]
        #         for evo in evos:
        #             evo['base'] = base

        #     for evo in evos['evolutions']:
        #         evo['base']

        api.learnable = {
            'stunfisk_galarian': {
                'fast': [
                    'MUD_SHOT',
                    'METAL_CLAW',
                ],
                'charge': [
                    'EARTHQUAKE',
                    'FLASH_CANNON',
                    'MUDDY_WATER',
                    'ROCK_SLIDE',
                ]
            }
        }
Ejemplo n.º 18
0
def benchmark_nested_break():
    """
    There are several ways to do a nested break, but which one is best?

    https://twitter.com/nedbat/status/1515345787563220996
    """
    import ubelt as ub
    import pandas as pd
    import timerit
    import itertools as it

    def method1_itertools(iter1, iter2):
        for i, j in it.product(iter1, iter2):
            if i == 20 and j == 20:
                break

    def method2_except(iter1, iter2):
        class Found(Exception):
            pass
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise Found
        except Found:
            pass

    class FoundPredef(Exception):
        pass

    def method2_5_except_predef(iter1, iter2):
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise FoundPredef
        except FoundPredef:
            pass

    def method3_gendef(iter1, iter2):
        def genfunc():
            for i in iter1:
                for j in iter2:
                    yield i, j

        for i, j in genfunc():
            if i == 20 and j == 20:
                break

    def method4_genexp(iter1, iter2):
        genexpr = ((i, j) for i in iter1 for j in iter2)
        for i, j in genexpr:
            if i == 20 and j == 20:
                break

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(1000, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    import numpy as np
    basis = {
        'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'],
        # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int),
        'input_style': ['range', 'list', 'customized_iter'],
        # 'param_name': [param values],
    }
    xlabel = 'size'
    xinput_labels = ['n1', 'n2', 'size']

    # Set these to param labels that directly transfer to method kwargs
    kw_labels = []
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['input_style'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    def make_input(params):
        # Given the parameterization make the benchmark function input
        # n1 = params['n1']
        # n2 = params['n2']
        size = params['size']
        n1 = int(np.sqrt(size))
        n2 = int(np.sqrt(size))
        if params['input_style'] == 'list':
            iter1 = list(range(n1))
            iter2 = list(range(n1))
        elif params['input_style'] == 'range':
            iter1 = range(n1)
            iter2 = range(n2)
        elif params['input_style'] == 'customized_iter':
            import random
            def rando1():
                rng1 = random.Random(0)
                for _ in range(n1):
                    yield rng1.randint(0, n2)

            def rando2():
                rng2 = random.Random(1)
                for _ in range(n1):
                    yield rng2.randint(0, n2)

            iter1 = rando1()
            iter2 = rando2()
        else:
            raise KeyError
        return {'iter1': iter1, 'iter2': iter2}

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        # size = params['n1'] * params['n2']
        # params['size'] = size
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(),  kw_labels)

        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            kwargs.update(make_input(params))
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            # chunk_iter = ub.chunks(ti.times, ti.bestof)
            # times = list(map(min, chunk_iter))  # TODO: timerit method for this
            times = ti.robust_times()
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1)))
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw)
        ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}')
        ax.set_xlabel(f'{xlabel}')
        ax.set_ylabel('Time')
        ax.set_xscale('log')
        ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
Ejemplo n.º 19
0
def benchamrk_det_nms():
    """
    Benchmarks different implementations of non-max-supression on the CPU, GPU,
    and using cython / numpy / torch.

    CommandLine:
        xdoctest -m ~/code/kwimage/dev/bench_nms.py benchamrk_det_nms --show

    SeeAlso:
        PJR Darknet NonMax supression
        https://github.com/pjreddie/darknet/blob/master/src/box.c

        Lightnet NMS
        https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116
    """

    # N = 200
    # bestof = 50
    N = 1
    bestof = 1

    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000]

    # max number of boxes yolo will spit out at a time
    max_boxes = 19 * 19 * 5

    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500,
        max_boxes
    ]
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500]

    # Demo values
    xdata = [0, 1, 2, 3, 10, 100, 200, 300, 500]

    if ub.argflag('--small'):
        xdata = [10, 100, 500, 1000, 1500, 2000, 5000, 10000]

    if ub.argflag('--medium'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
        ]

    if ub.argflag('--large'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
            100000,
        ]

    if ub.argflag('--extra-large'):
        xdata = [
            1000,
            2000,
            10000,
            20000,
            40000,
            100000,
            200000,
        ]

    title_parts = []

    SMALL_BOXES = ub.argflag('--small-boxes')
    if SMALL_BOXES:
        title_parts.append('small boxes')
    else:
        title_parts.append('large boxes')

    # NOTE: for large images we may have up to 21,850,753 detections!

    thresh = float(ub.argval('--thresh', default=0.4))
    title_parts.append('thresh={:.2f}'.format(thresh))

    from kwimage.algo.algo_nms import available_nms_impls
    valid_impls = available_nms_impls()
    print('valid_impls = {!r}'.format(valid_impls))

    basis = {
        'type': ['ndarray', 'tensor', 'tensor0'],
        # 'daq': [True, False],
        # 'daq': [False],
        # 'device': [None],
        # 'impl': valid_impls,
        'impl': valid_impls + ['auto'],
    }

    if ub.argflag('--daq'):
        basis['daq'] = [True, False]

    # if torch.cuda.is_available():
    #     basis['device'].append(0)

    combos = [
        ub.dzip(basis.keys(), vals) for vals in it.product(*basis.values())
    ]

    def is_valid_combo(combo):
        # if combo['impl'] in {'py', 'cython_cpu'} and combo['device'] is not None:
        #     return False
        # if combo['type'] == 'ndarray' and combo['impl'] == 'cython_gpu':
        #     if combo['device'] is None:
        #         return False
        # if combo['type'] == 'ndarray' and combo['impl'] != 'cython_gpu':
        #     if combo['device'] is not None:
        #         return False

        # if combo['type'].endswith('0'):
        #     if combo['impl'] in {'numpy', 'cython_gpu', 'cython_cpu'}:
        #         return False

        # if combo['type'] == 'ndarray':
        #     if combo['impl'] in {'torch'}:
        #         return False

        REMOVE_SLOW = True
        if REMOVE_SLOW:
            known_bad = [
                {
                    'impl': 'torch',
                    'type': 'tensor'
                },
                {
                    'impl': 'numpy',
                    'type': 'tensor'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor'},
                {
                    'impl': 'cython_cpu',
                    'type': 'tensor'
                },

                # {'impl': 'torch', 'type': 'tensor0'},
                {
                    'impl': 'numpy',
                    'type': 'tensor0'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor0'},
                # {'impl': 'cython_cpu', 'type': 'tensor0'},
                {
                    'impl': 'torchvision',
                    'type': 'ndarray'
                },
            ]
            for known in known_bad:
                if all(combo[key] == val for key, val in known.items()):
                    return False

        return True

    combos = list(filter(is_valid_combo, combos))

    times = ub.ddict(list)
    for num in xdata:

        if num > 10000:
            N = 1
            bestof = 1
        if num > 1000:
            N = 3
            bestof = 1
        if num > 100:
            N = 10
            bestof = 3
        elif num > 10:
            N = 100
            bestof = 10
        else:
            N = 1000
            bestof = 10
        print('\n\n---- number of boxes = {} ----\n'.format(num))

        outputs = {}

        ti = ub.Timerit(N, bestof=bestof, verbose=1)

        # Build random test boxes and scores
        np_dets1 = kwimage.Detections.random(num // 2, scale=1000.0, rng=0)
        np_dets1.data['boxes'] = np_dets1.boxes.to_xywh()

        if SMALL_BOXES:
            max_dim = 100
            np_dets1.boxes.data[..., 2] = np.minimum(np_dets1.boxes.width,
                                                     max_dim).ravel()
            np_dets1.boxes.data[..., 3] = np.minimum(np_dets1.boxes.height,
                                                     max_dim).ravel()

        np_dets2 = copy.deepcopy(np_dets1)
        np_dets2.boxes.translate(10, inplace=True)
        # add boxes that will definately be removed
        np_dets = kwimage.Detections.concatenate([np_dets1, np_dets2])

        # make all scores unique to ensure comparability
        np_dets.scores[:] = np.linspace(0, 1, np_dets.num_boxes())

        np_dets.data['scores'] = np_dets.scores.astype(np.float32)
        np_dets.boxes.data = np_dets.boxes.data.astype(np.float32)

        typed_data = {}
        # ----------------------------------

        import netharn as nh
        for combo in combos:
            print('combo = {}'.format(ub.repr2(combo, nl=0)))

            label = nh.util.make_idstr(combo)
            mode = combo.copy()

            # if mode['impl'] == 'cython_gpu':
            #     mode['device_id'] = mode['device']

            mode_type = mode.pop('type')

            if mode_type in typed_data:
                dets = typed_data[mode_type]
            else:
                if mode_type == 'ndarray':
                    dets = np_dets.numpy()
                elif mode_type == 'tensor':
                    dets = np_dets.tensor(None)
                elif mode_type == 'tensor0':
                    dets = np_dets.tensor(0)
                else:
                    raise KeyError
                typed_data[mode_type] = dets

            for timer in ti.reset(label):
                with timer:
                    keep = dets.non_max_supression(thresh=thresh, **mode)
                    torch.cuda.synchronize()
            times[ti.label].append(ti.min())
            outputs[ti.label] = ensure_numpy_indices(keep)

        # ----------------------------------

        # Check that all kept boxes do not have more than `threshold` ious
        if 0:
            for key, keep_idxs in outputs.items():
                kept = np_dets.take(keep_idxs).boxes
                ious = kept.ious(kept)
                max_iou = (np.tril(ious) - np.eye(len(ious))).max()
                if max_iou > thresh:
                    print('{} produced a bad result with max_iou={}'.format(
                        key, max_iou))

        # Check result consistency:
        print('\nResult stats:')
        for key in sorted(outputs.keys()):
            print('    * {:<20}: num={}'.format(key, len(outputs[key])))

        print('\nResult overlaps (method1, method2: jaccard):')
        datas = []
        for k1, k2 in it.combinations(sorted(outputs.keys()), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / max(len(idxs1 | idxs2), 1)
            datas.append((k1, k2, jaccard))

        datas = sorted(datas, key=lambda x: -x[2])
        for k1, k2, jaccard in datas:
            print('    * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard))

    if True:
        ydata = {key: 1.0 / np.array(vals) for key, vals in times.items()}
        ylabel = 'Hz'
        reverse = True
        yscale = 'symlog'
    else:
        ydata = {key: np.array(vals) for key, vals in times.items()}
        ylabel = 'seconds'
        reverse = False
        yscale = 'linear'
    scores = {key: vals[-1] for key, vals in ydata.items()}
    ydata = ub.dict_subset(ydata, ub.argsort(scores, reverse=reverse))

    ###
    times_of_interest = [0, 10, 100, 200, 1000]
    times_of_interest = xdata

    lines = []
    record = lines.append
    record('### times_of_interest = {!r}'.format(times_of_interest))
    for x in times_of_interest:

        if times_of_interest[-1] == x:
            record('else:')
        elif times_of_interest[0] == x:
            record('if num <= {}:'.format(x))
        else:
            record('elif num <= {}:'.format(x))

        if x in xdata:
            pos = xdata.index(x)
            score_wrt_x = {}
            for key, vals in ydata.items():
                score_wrt_x[key] = vals[pos]

            typekeys = ['tensor0', 'tensor', 'ndarray']
            type_groups = dict([(b,
                                 ub.group_items(score_wrt_x,
                                                lambda y: y.endswith(b))[True])
                                for b in typekeys])
            # print('\n=========')
            # print('x = {!r}'.format(x))
            record('    if code not in {!r}:'.format(set(typekeys)))
            record('        raise KeyError(code)')
            for typekey, group in type_groups.items():
                # print('-------')
                record('    if code == {!r}:'.format(typekey))
                # print('typekey = {!r}'.format(typekey))
                # print('group = {!r}'.format(group))
                group_x = ub.dict_isect(score_wrt_x, group)
                valid_keys = ub.argsort(group_x, reverse=True)
                valid_x = ub.dict_subset(group_x, valid_keys)
                # parts = [','.split(k) for k in valid_keys]
                ordered_impls = []
                ordered_impls2 = ub.odict()
                for k in valid_keys:
                    vals = valid_x[k]
                    p = k.split(',')
                    d = dict(i.split('=') for i in p)
                    ordered_impls2[d['impl']] = vals
                    ordered_impls.append(d['impl'])

                ordered_impls = list(ub.oset(ordered_impls) - {'auto'})
                ordered_impls2.pop('auto')
                record('        # {}'.format(
                    ub.repr2(ordered_impls2, precision=1, nl=0,
                             explicit=True)))
                record('        preference = {}'.format(
                    ub.repr2(ordered_impls, nl=0)))
    record('### end times of interest ')
    print(ub.indent('\n'.join(lines), ' ' * 8))
    ###

    markers = {
        key: 'o' if 'auto' in key else ''
        for key, score in scores.items()
    }

    if ub.argflag('--daq'):
        markers = {
            key: '+' if 'daq=True' in key else ''
            for key, score in scores.items()
        }

    labels = {
        key: '{:.2f} {} - {}'.format(score, ylabel[0:3], key)
        for key, score in scores.items()
    }

    title = 'NSM-impl speed: ' + ', '.join(title_parts)

    import kwplot
    kwplot.autompl()
    kwplot.multi_plot(
        xdata,
        ydata,
        xlabel='num boxes',
        ylabel=ylabel,
        label=labels,
        yscale=yscale,
        title=title,
        marker=markers,
        # xscale='symlog',
    )

    kwplot.show_if_requested()
Ejemplo n.º 20
0
        'size': ['subsize'],
        'style': ['subsize'],
    }
    hue_labels = ub.oset(basis) - {xlabel}
    if group_labels:
        hue_labels = hue_labels - set.union(*map(set, group_labels.values()))
    group_labels['hue'] = list(hue_labels)
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)

        data_kwargs = ub.dict_isect(params.copy(),  data_kwkeys)
        func_kwargs = generate_data(**data_kwargs)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            with timer:
                # MAIN LOGIC
                method(**func_kwargs)

        for time in ti.times:
            row = {
                # 'mean': ti.mean(),
Ejemplo n.º 21
0
    group_labels = {
        'size': ['subsize'],
        'style': ['subsize'],
    }
    hue_labels = ub.oset(basis) - {xlabel}
    if group_labels:
        hue_labels = hue_labels - set.union(*map(set, group_labels.values()))
    group_labels['hue'] = list(hue_labels)
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)

        data_kwargs = ub.dict_isect(params.copy(), data_kwkeys)
        func_kwargs = generate_data(**data_kwargs)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            with timer:
                # MAIN LOGIC
                method(**func_kwargs)

        for time in ti.times:
Ejemplo n.º 22
0
def ford_circles():
    """
    Draw Ford Circles

    This is a Ford Circle diagram of the Rationals and Float32 numbers.
    Only 163 of the 32608 rationals I generated can be exactly represented by a float32.

    [MF 14]
    [MF 95]

    [MF 14] https://www.youtube.com/watch?v=83ZjYvkdzYI&list=PL5A714C94D40392AB&index=14
    [MF 95] https://www.youtube.com/watch?v=gATEJ3f3FBM&list=PL5A714C94D40392AB&index=95

    Examples:
        import kwplot
        kwplot.autompl()
    """
    import kwplot
    import ubelt as ub
    import matplotlib as mpl
    plt = kwplot.autoplt()
    sns = kwplot.autosns()  # NOQA

    limit = 256 * 256
    print('limit = {!r}'.format(limit))
    rats_to_plot = set()

    maxx = 1
    _iter = Rational.members(limit=limit)
    _genrat = set(ub.ProgIter(_iter, total=limit, desc='gen rats'))
    rats_to_plot |= _genrat
    rats_to_plot2 = sorted({Rational(r % maxx) for r in rats_to_plot} | {maxx})

    floats = sorted(
        ub.unique(map(float, rats_to_plot2),
                  key=lambda f: f.as_integer_ratio()))
    print(f'{len(rats_to_plot)  = }')
    print(f'{len(rats_to_plot2) = }')
    print(f'{len(floats)        = }')
    import numpy as np

    ax = kwplot.figure(fnum=1, doclf=True).gca()
    prog = ub.ProgIter(sorted(rats_to_plot2), verbose=1)
    dtype = np.float32
    patches = ub.ddict(list)
    errors = []
    for rat in prog:
        denominator = rat.denominator
        radius = 1 / (2 * (denominator * denominator))
        point = (rat, radius)
        flt = dtype(rat)
        a, b = flt.as_integer_ratio()
        flt_as_rat = Rational(a, b)
        error = abs(rat - flt_as_rat)
        if error == 0:
            new_circle = plt.Circle(point,
                                    radius,
                                    facecolor='dodgerblue',
                                    edgecolor='none',
                                    linewidth=0,
                                    alpha=0.5)
            patches['good'].append(new_circle)
        else:
            errors.append(error)
            # Plot a line for error
            new_circle = plt.Circle(point,
                                    radius,
                                    facecolor='orangered',
                                    edgecolor='none',
                                    linewidth=0,
                                    alpha=0.5)
            patches['bad'].append(new_circle)
            ax.plot((rat - error, rat + error), (radius, radius),
                    'x-',
                    color='darkgray')

    print(ub.map_vals(len, patches))
    total = float(sum(errors))
    print('total = {!r}'.format(total))
    print(max(errors))
    print(min(errors))

    for v in patches.values():
        first = ub.peek(v)
        prop = ub.dict_isect(first.properties(),
                             ['facecolor', 'linewidth', 'alpha', 'edgecolor'])
        col = mpl.collections.PatchCollection(v, **prop)
        ax.add_collection(col)

    # Lets look for the holes in IEEE float
    # for flt in ub.ProgIter(sorted(floats), verbose=1):

    kwplot.phantom_legend({
        f'rationals without a {dtype}':
        'orangered',
        f'rationals with a {dtype}':
        'dodgerblue',
        f'x-x indicates {dtype} approximation error':
        'darkgray',
    })

    ax.set_title('Holes in IEEE 754 Float64')
    ax.set_xlabel('A rational number')
    ax.set_ylabel('The squared rational denominator')

    # import numpy as np
    # points = np.array([c.center for c in _circles])
    # maxx, maxy = points.max(axis=0)
    # print('maxx = {!r}'.format(maxx))
    # print('maxy = {!r}'.format(maxy))
    # maxx, maxy = maxx // 2, maxy // 2
    # ax.set_xlim(0, np.sqrt(int(maxx)))
    # ax.set_ylim(0, np.sqrt(int(maxy)))
    # ax.set_aspect('equal')
    # ax.set_xlim(0.2, 0.22)
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 0.1)
Ejemplo n.º 23
0
def benchmark_unpack():
    """
    What is faster unpacking items with slice syntax or tuple-unpacking

    Slice unpacking seems to be a tad faster.
    """
    import ubelt as ub
    import random
    import pandas as pd
    import timerit
    import string

    def tuple_unpack(items):
        *prefix, key = items
        return prefix, key

    def slice_unpack(items):
        prefix, key = items[:-1], items[-1]
        return prefix, key

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(5000, bestof=3, verbose=2)

    basis = {
        'method': ['tuple_unpack', 'slice_unpack'],
        'size': list(range(1, 64 + 1)),
        'type': ['string', 'float'],
    }
    xlabel = 'size'
    kw_labels = []
    group_labels = {
        'style': ['type'],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        size = params['size']
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            if type == 'string':
                items = [
                    ''.join(random.choices(string.printable, k=5))
                    for _ in range(size)
                ]
            elif type == 'float':
                items = [random.random() for _ in range(size)]
            with timer:
                method(items)
        for time in ti.times:
            row = {
                'time': time,
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('time')
    summary_rows = []
    for method, group in data.groupby('method'):
        row = {}
        row['method'] = method
        row['mean'] = group['time'].mean()
        row['std'] = group['time'].std()
        row['min'] = group['time'].min()
        row['max'] = group['time'].max()
        summary_rows.append(row)
    print(pd.DataFrame(summary_rows).sort_values('mean'))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y='time',
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Execution time')
        ax.set_ylabel('Size of slices')