Exemple #1
0
    def _load_all_scene_paths(task):
        """
        Parses scene paths into dictionaries that organize it by scenes
        suitable for cross validation.
        """
        scene_im_paths = ub.AutoDict()
        scene_gt_paths = ub.AutoDict()

        keys = task._preprocessing_keys()

        for scene, key in it.product(task.scene_ids, keys):
            im_dpath = task.datasubdir('im' + key, scene)
            gt_dpath = task.datasubdir('gt' + key, scene)

            im_paths = imutil.load_image_paths(im_dpath, ext='.png')
            gt_paths = imutil.load_image_paths(gt_dpath, ext='.png')

            im_paths = list(map(abspath, im_paths))
            gt_paths = list(map(abspath, gt_paths))

            scene_im_paths[scene][key] = im_paths
            scene_gt_paths[scene][key] = gt_paths

        scene_im_paths = scene_im_paths.to_dict()
        scene_gt_paths = scene_gt_paths.to_dict()
        return scene_im_paths, scene_gt_paths
Exemple #2
0
    def _all_scene_dpaths(task):
        """
        Returns the directories that the train testing data will exist in
        """
        scene_im_dpaths = ub.AutoDict()
        scene_gt_dpaths = ub.AutoDict()

        keys = task._preprocessing_keys()

        for scene, key in it.product(task.scene_ids, keys):
            im_dpath = task.datasubdir('im' + key, scene)
            gt_dpath = task.datasubdir('gt' + key, scene)
            scene_im_dpaths[scene][key] = im_dpath
            scene_gt_dpaths[scene][key] = gt_dpath

        return scene_im_dpaths, scene_gt_dpaths
Exemple #3
0
def decollate_batch(batch):
    """
    Breakup a collated batch of BatchContainers back into ItemContainers

    Example:
        >>> bsize = 5
        >>> batch_items = [
        >>>     {
        >>>         'im': ItemContainer.demo('img'),
        >>>         'label': ItemContainer.demo('labels'),
        >>>         'box': ItemContainer.demo('box'),
        >>>     }
        >>>     for _ in range(bsize)
        >>> ]
        >>> batch = container_collate(batch_items, num_devices=2)
        >>> decollated = decollate_batch(batch)
        >>> assert len(decollated) == len(batch_items)
        >>> assert (decollated[0]['im'].data == batch_items[0]['im'].data).all()
    """
    import ubelt as ub
    from kwcoco.util.util_json import IndexableWalker
    walker = IndexableWalker(batch)
    decollated_dict = ub.AutoDict()
    decollated_walker = IndexableWalker(decollated_dict)
    for path, batch_val in walker:
        if isinstance(batch_val, BatchContainer):
            for bx, item_val in enumerate(ub.flatten(batch_val.data)):
                decollated_walker[[bx] + path] = ItemContainer(item_val)
    decollated = list(decollated_dict.to_dict().values())
    return decollated
def bench_isinstance_vs_attr():
    instances = {
        'base1': Base1(),
        'base2': Base2(),
        'derived2': Derived2(),
    }

    import ubelt as ub
    ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us')

    # Do this twice, but keep the second measure
    data = ub.AutoDict()

    for selfname, self in instances.items():

        print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue'))

        subdata = data[selfname] = {}

        for timer in ti.reset('isinstance(self, Base1)'):
            with timer:
                isinstance(self, Base1)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('isinstance(self, Base2)'):
            with timer:
                isinstance(self, Base2)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('isinstance(self, Derived2)'):
            with timer:
                isinstance(self, Derived2)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, "class_attr1", False)'):
            with timer:
                getattr(self, 'class_attr1', False)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, "attr1", False)'):
            with timer:
                getattr(self, 'attr1', False)
        subdata[ti.label] = ti.min()

    try:
        import pandas as pd
        df = pd.DataFrame(data) * 1e9
        try:
            from kwil.util.util_pandas import _to_string_monkey
            print(_to_string_monkey(df, key='minima'))
        except Exception:
            print(df)
    except ImportError:
        print('no pandas')
        print(ub.repr2(data, nl=2, precision=4))
Exemple #5
0
def test_auto_dict():
    auto = ub.AutoDict()
    assert 0 not in auto
    auto[0][10][100] = None
    assert 0 in auto
    assert isinstance(auto[0], ub.AutoDict)
Exemple #6
0
def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()
Exemple #7
0
def benchmark_attribute_access():
    """
    How fast are different methods of accessing attributes? Lets find out!
    """

    instances = {
        'simple': Simple(),
        'complex': Complex(),
        'slot_simple': SimpleWithSlots(),
        'slot_complex': ComplexWithSlots(),
    }

    import ubelt as ub

    ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us')

    # Do this twice, but keep the second measure
    data = ub.AutoDict()

    for selfname, self in instances.items():

        print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue'))

        subdata = data[selfname] = {}

        for timer in ti.reset('self.attr1'):
            with timer:
                self.attr1
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, attr1)'):
            with timer:
                getattr(self, 'attr1')
        subdata[ti.label] = ti.min()

        attrs = ['attr1', 'attr2']

        for attrname in attrs:
            for timer in ti.reset('hasattr(self, {})'.format(attrname)):
                with timer:
                    hasattr(self, attrname)
            subdata[ti.label] = ti.min()

            for timer in ti.reset('getattr(self, {}, None)'.format(attrname)):
                with timer:
                    getattr(self, attrname, None)
            subdata[ti.label] = ti.min()

            if 'slot' not in selfname.lower():
                for timer in ti.reset(
                        'self.__dict__.get({}, None)'.format(attrname)):
                    with timer:
                        self.__dict__.get(attrname, None)
                subdata[ti.label] = ti.min()

        for timer in ti.reset('try/except: self.attr2'):
            with timer:
                try:
                    x = self.attr2
                except AttributeError:
                    x = None
        subdata[ti.label] = ti.min()

        for timer in ti.reset('try/except: self.attr1'):
            with timer:
                try:
                    x = self.attr1
                except AttributeError:
                    x = None
        subdata[ti.label] = ti.min()

        del x

    try:
        import pandas as pd
        df = pd.DataFrame(data) * 1e9
        try:
            from kwil.util.util_pandas import _to_string_monkey
            print(_to_string_monkey(df, key='minima'))
        except Exception:
            print(df)
    except ImportError:
        print('no pandas')
        print(ub.repr2(data, nl=2, precision=4))
Exemple #8
0
def benchmark_hash_file():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --show
        python ~/code/ubelt/dev/bench_hash.py --show
    """
    import ubelt as ub
    import random

    # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp'))
    dpath = ub.ensuredir(ub.expandpath('$HOME/tmp'))

    rng = random.Random(0)
    # Create a pool of random chunks of data
    chunksize = int(2 ** 20)
    pool_size = 8
    part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)]

    #ITEM = 'JUST A STRING' * 100
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']

    scales = list(range(5, 10))
    import os

    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2 ** s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        # Write a big file
        size_pool = [N]
        fpath = _write_random_file(dpath, part_pool, size_pool, rng)

        megabytes = os.stat(fpath).st_size / (2 ** 20)
        print('megabytes = {!r}'.format(megabytes))

        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_file(fpath, hasher=hasher)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds')
        kwplot.show_if_requested()
Exemple #9
0
def 灵活字典():
    return ub.AutoDict()
Exemple #10
0
    def build_pyproject(self):
        # data = toml.loads((self.template_dpath / 'pyproject.toml').read_text())
        # print('data = {}'.format(ub.repr2(data, nl=5)))
        pyproj_config = ub.AutoDict()
        # {'tool': {}}
        if 'binpy' in self.config['tags']:
            pyproj_config['build-system']['requires'] = [
                "setuptools>=41.0.1",
                # setuptools_scm[toml]
                "wheel",
                "scikit-build>=0.9.0",
                "numpy",
                "ninja"
            ]
            pyproj_config['tool']['cibuildwheel'].update({
                'build':
                "cp37-* cp38-* cp39-* cp310-*",
                'build-frontend':
                "build",
                'skip':
                "pp* cp27-* cp34-* cp35-* cp36-* *-musllinux_*",
                'build-verbosity':
                1,
                'test-requires': ["-r requirements/tests.txt"],
                'test-command':
                "python {project}/run_tests.py"
            })

            if True:
                cibw = pyproj_config['tool']['cibuildwheel']
                req_commands = {
                    'linux': [
                        'yum install epel-release lz4 lz4-devel -y',
                    ],
                    'windows': [
                        'choco install lz4 -y',
                    ],
                    'macos': [
                        'brew install lz4',
                    ]
                }
                for plat in req_commands.keys():
                    cmd = ' && '.join(req_commands[plat])
                    cibw[plat]['before-all'] = cmd

        WITH_PYTEST_INI = 1
        if WITH_PYTEST_INI:
            pytest_ini_opts = pyproj_config['tool']['pytest']['ini_options']
            pytest_ini_opts[
                'addopts'] = "-p no:doctest --xdoctest --xdoctest-style=google --ignore-glob=setup.py"
            pytest_ini_opts[
                'norecursedirs'] = ".git ignore build __pycache__ dev _skbuild"
            pytest_ini_opts['filterwarnings'] = [
                "default",
                "ignore:.*No cfgstr given in Cacher constructor or call.*:Warning",
                "ignore:.*Define the __nice__ method for.*:Warning",
                "ignore:.*private pytest class or function.*:Warning",
            ]

        WITH_COVERAGE = 1
        if WITH_COVERAGE:
            pyproj_config['tool']['coverage'].update(
                toml.loads(
                    ub.codeblock('''
                [run]
                branch = true

                [report]
                exclude_lines =[
                    "pragma: no cover",
                    ".*  # pragma: no cover",
                    ".*  # nocover",
                    "def __repr__",
                    "raise AssertionError",
                    "raise NotImplementedError",
                    "if 0:",
                    "if trace is not None",
                    "verbose = .*",
                    "^ *raise",
                    "^ *pass *$",
                    "if _debug:",
                    "if __name__ == .__main__.:",
                    ".*if six.PY2:"
                ]

                omit=[
                    "{REPO_NAME}/__main__.py",
                    "*/setup.py"
                ]
                ''').format(REPO_NAME=self.repo_name)))

        text = toml.dumps(pyproj_config)
        return text