def check_relationships(branches):

    ancestors = {b: set() for b in branches}
    length = len(branches) * (len(branches) - 1)
    for b1, b2 in ub.ProgIter(it.combinations(branches, 2), length=length):
        ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b1, b2))['ret']
        if ret == 0:
            ancestors[b1].add(b2)
        ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b2, b1))['ret']
        if ret == 0:
            ancestors[b2].add(b1)
    print('<key> is an ancestor of <value>')
    print(ub.repr2(ancestors))

    descendants = {b: set() for b in branches}
    for key, others in ancestors.items():
        for o in others:
            descendants[o].add(key)
    print('<key> descends from <value>')
    print(ub.repr2(descendants))

    import plottool as pt
    import networkx as nx
    G = nx.DiGraph()
    G.add_nodes_from(branches)
    for key, others in ancestors.items():
        for o in others:
            # G.add_edge(key, o)
            G.add_edge(o, key)

    from networkx.algorithms.connectivity.edge_augmentation import collapse
    flag = True
    G2 = G
    while flag:
        flag = False
        for u, v in list(G2.edges()):
            if G2.has_edge(v, u):
                G2 = collapse(G2, [[u, v]])

                node_relabel = ub.ddict(list)
                for old, new in G2.graph['mapping'].items():
                    node_relabel[new].append(old)
                G2 = nx.relabel_nodes(G2, {k: '\n'.join(v) for k, v in node_relabel.items()})
                flag = True
                break

    G3 = nx.transitive_reduction(G2)
    pt.show_nx(G3, arrow_width=1.5, prog='dot', layoutkw=dict(prog='dot'))
    pt.zoom_factory()
    pt.pan_factory()
    pt.plt.show()
Beispiel #2
0
    def _configure(self):
        logger.debug(' ----- ' + self.__class__.__name__ + ' configure')
        config = tmp_smart_cast_config(self)

        logger.info('triangulator config = {}'.format(ub.repr2(config, nl=2)))
        output_fpath = config.pop('output_fpath')
        cal_fpath = config.pop('cal_fpath')
        self.triangulator = ctalgo.FishStereoMeasurments(**config)

        # Camera loading process is not working correctly.
        # Load camera calibration data here for now.
        #
        if not os.path.exists(cal_fpath):
            raise KeyError('must specify a valid camera calibration path')
        self.cal = ctalgo.StereoCalibration.from_file(cal_fpath)
        logger.info('self.cal = {!r}'.format(self.cal))

        self.headers = ['current_frame', 'fishlen', 'range', 'error', 'dz',
                        'box_pts1', 'box_pts2']
        self.output_file = open(output_fpath, 'w')
        self.output_file.write(','.join(self.headers) + '\n')
        self.output_file.close()

        self.output_file = open(output_fpath, 'a')
        self._base_configure()

        self.prog = ub.ProgIter(verbose=3)
        self.prog.begin()
Beispiel #3
0
    def _recurse(d):
        import torch
        import numpy as np
        if isinstance(d, dict):
            return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()]))

        clsname = type(d).__name__
        if 'Container' in clsname:
            meta = ub.odict(sorted([
                ('stack', d.stack),
                # ('padding_value', d.padding_value),
                # ('pad_dims', d.pad_dims),
                # ('datatype', d.datatype),
                ('cpu_only', d.cpu_only),
            ]))
            meta = ub.repr2(meta, nl=0)
            return {type(d).__name__ + meta: _recurse(d.data)}
        elif isinstance(d, list):
            return [_recurse(v) for v in d]
        elif isinstance(d, tuple):
            return tuple([_recurse(v) for v in d])
        elif isinstance(d, torch.Tensor):
            return d.shape
        elif isinstance(d, np.ndarray):
            return d.shape
        elif isinstance(d, (str, bytes)):
            return d
        elif isinstance(d, (int, float)):
            return d
        elif isinstance(d, slice):
            return d
        elif 'PolygonMasks' == clsname:
            # hack for mmdet
            return repr(d)
        elif 'BitmapMasks' == clsname:
            # hack for mmdet
            return repr(d)
        elif hasattr(d, 'shape'):
            return d.shape
        elif hasattr(d, 'items'):
            # hack for dict-like objects
            return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()]))
        else:
            raise TypeError(type(d))
Beispiel #4
0
def _update_hashes():
    """
    for dev use to update hashes of the demo images

    CommandLine:
        xdoctest -m kwimage.im_demodata _update_hashes
        xdoctest -m kwimage.im_demodata _update_hashes --require-hashes
    """
    TEST_IMAGES = _TEST_IMAGES.copy()

    for key in TEST_IMAGES.keys():
        item = TEST_IMAGES[key]

        grabkw = {
            'appname': 'kwimage/demodata',
        }
        # item['sha512'] = 'not correct'

        # Wait until ubelt 9.1 is released to change hasher due to
        # issue in ub.grabdata
        # hasher_priority = ['sha512', 'sha1']
        hasher_priority = ['sha1']

        REQUIRE_EXISTING_HASH = ub.argflag('--require-hashes')
        if REQUIRE_EXISTING_HASH:
            for hasher in hasher_priority:
                if hasher in item:
                    grabkw.update({
                        'hash_prefix': item[hasher],
                        'hasher': hasher,
                    })
                    break

        if 'fname' in item:
            grabkw['fname'] = item['fname']

        item.pop('sha512', None)
        fpath = ub.grabdata(item['url'], **grabkw)
        if 'hasher' not in item:
            hasher = hasher_priority[0]
            prefix = ub.hash_file(fpath, hasher=hasher)
            item[hasher] = prefix[0:64]

        print('_TEST_IMAGES = ' + ub.repr2(TEST_IMAGES, nl=2))
Beispiel #5
0
def sed(regexpr,
        repl,
        dpath=None,
        include=None,
        exclude=None,
        recursive=True,
        dry=False,
        verbose=1):
    r"""
    Execute a sed on multiple files.

    Example:
        >>> from xdev.search_replace import *  # NOQA
        >>> from xdev.search_replace import _create_test_filesystem
        >>> dpath = _create_test_filesystem()['root']
        >>> sed('a', 'x', dpath=dpath, dry=True)
    """
    num_changed = 0
    num_files_checked = 0
    num_skipped = 0
    fpaths_changed = []

    fpath_generator = find(dpath=dpath,
                           type='f',
                           include=include,
                           exclude=exclude,
                           recursive=recursive)
    for fpath in fpath_generator:
        try:
            changed_lines = sedfile(fpath, regexpr, repl, dry=dry)
        except UnicodeDecodeError:
            num_skipped += 1
        else:
            num_files_checked += 1
            if len(changed_lines) > 0:
                fpaths_changed.append(fpath)
                num_changed += len(changed_lines)

    if verbose:
        print('num_files_checked = {}'.format(num_files_checked))
        print('num probable binary files skipped = {}'.format(num_skipped))
        print('fpaths_changed = {}'.format(ub.repr2(sorted(fpaths_changed))))
        print('total lines changed = {!r}'.format(num_changed))
Beispiel #6
0
    def setup_dpath(self, train_dpath, short=True, hashed=True):
        train_info = self.train_info(train_dpath, short, hashed)

        train_dpath = ub.ensuredir(train_info['train_dpath'])

        # backwards compatability code,
        # can eventually remove after a major version change
        if True:
            # backwards compatability code
            if os.path.exists(
                    train_info['old_train_dpath']) and not os.path.islink(
                        train_info['old_train_dpath']):
                ub.delete(train_info['train_dpath'])
                ub.symlink(train_info['old_train_dpath'],
                           train_info['train_dpath'],
                           overwrite=True,
                           verbose=3)

        # setup symlinks
        # ub.ensuredir(dirname(train_info['link_dpath']))
        # ub.symlink(train_info['train_dpath'], train_info['link_dpath'],
        #            overwrite=True, verbose=3)

        if train_info['nice_dpath']:
            ub.ensuredir(dirname(train_info['nice_dpath']))
            ub.symlink(train_info['train_dpath'],
                       train_info['nice_dpath'],
                       overwrite=True,
                       verbose=3)

        verbose = 0
        if verbose:
            print('+=========')
            # print('hyper_strid = {!r}'.format(params.hyper_id()))
            # print('train_init_id = {!r}'.format(train_info['input_id']))
            # print('arch = {!r}'.format(train_info['arch_id']))
            # print('train_hyper_hashid = {!r}'.format(train_info['train_hyper_hashid']))
            print('hyper = {}'.format(ub.repr2(train_info['hyper'], nl=3)))
            print('train_hyper_id_brief = {!r}'.format(
                train_info['train_hyper_id_brief']))
            print('train_id = {!r}'.format(train_info['train_id']))
            print('+=========')
        return train_info
Beispiel #7
0
 def text_between_lines(lnum1, lnum2, col1=0, col2=sys.maxsize - 1):
     import vim
     # lines = vim.eval('getline({}, {})'.format(lnum1, lnum2))
     lines = vim.current.buffer[lnum1 - 1:lnum2]
     lines = [ub.ensure_unicode(line) for line in lines]
     try:
         if len(lines) == 0:
             pass
         elif len(lines) == 1:
             lines[0] = lines[0][col1:col2 + 1]
         else:
             # lines[0] = lines[0][col1:]
             # lines[-1] = lines[-1][:col2 + 1]
             for i in range(len(lines)):
                 lines[i] = lines[i][col1:col2 + 1]
         text = '\n'.join(lines)
     except Exception:
         print(ub.repr2(lines))
         raise
     return text
Beispiel #8
0
    def _load_sized_image(self, index, inp_size):
        # load the raw data from VOC

        cacher = ub.Cacher('voc_img', cfgstr=ub.repr2([index, inp_size]),
                           appname='clab')
        data = cacher.tryload()
        if data is None:
            image = self._load_image(index)
            orig_size = np.array(image.shape[0:2][::-1])
            factor = inp_size / orig_size
            # squish the image into network input coordinates
            interpolation = (cv2.INTER_AREA if factor.sum() <= 2 else
                             cv2.INTER_CUBIC)
            hwc255 = cv2.resize(image, tuple(inp_size),
                                interpolation=interpolation)
            data = hwc255, orig_size, factor
            cacher.save(data)

        hwc255, orig_size, factor = data
        return hwc255, orig_size, factor
Beispiel #9
0
def main():
    coarse = Coarse()
    fine1 = Fine_V1()
    fine2 = Fine_V2()
    print('coarse = {!r}'.format(coarse))
    print('fine1 = {!r}'.format(fine1))
    print('fine2 = {!r}'.format(fine2))

    cls_list = [Coarse, Fine_V1, Fine_V2]

    for data_cls in cls_list:
        data = data_cls()
        print('data_cls = {!r}'.format(data_cls))

        for coerce_cls in cls_list:
            res = coerce_cls.coerce(data)
            print('    child_cls = {}, {}'.format(ub.repr2(coerce_cls, nl=1), res))

    Coarse.coerce(fine1)
    Coarse.coerce(fine2)
Beispiel #10
0
def run_checks():
    cfg = viame_wrangler.config.WrangleConfig({
        'annots':
        ub.truepath('~/data/viame-challenge-2018/phase1-annotations/*/*.json')
    })
    fpaths = list(glob.glob(cfg.annots))
    print('fpaths = {}'.format(ub.repr2(fpaths)))

    for fpath in fpaths:
        dset_name = os.path.basename(fpath).split('-')[0].split('.')[0]
        dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name)

        assert not dset.missing_images()
        assert not dset._find_bad_annotations()
        assert all([
            img['has_annots'] in [True, False, None]
            for img in dset.imgs.values()
        ])
        if 'original' not in dset_name:
            assert len(dset.cats) in [106, 21]
Beispiel #11
0
    def _cmd(repo, command, cwd=ub.NoParam, verbose=ub.NoParam):
        if verbose is ub.NoParam:
            verbose = repo.verbose
        if cwd is ub.NoParam:
            cwd = repo.dpath

        repo._logged_cmds.append((command, cwd))
        repo.debug('Run {!r} in {!r}'.format(command, cwd))

        info = ub.cmd(command, cwd=cwd, verbose=verbose)

        if verbose:
            if info['out'].strip():
                repo.info(info['out'])

            if info['err'].strip():
                repo.debug(info['err'])

        if info['ret'] != 0:
            raise ShellException(ub.repr2(info))
        return info
Beispiel #12
0
def format_quotes_in_file(fpath, diff=True, write=False, verbose=3):
    """
    Autoformat quotation marks in Python files

    Args:
        fpath (str): The file to format
        diff (bool): if True write the diff between old and new to stdout
        write (bool): if True write the modifications to disk
        verbose (int): verbosity level
    """
    if verbose > 1:
        print('reading fpath = {!r}'.format(fpath))

    with open(fpath, 'r') as file:
        text = file.read()

    new_text = format_quotes_in_text(text)

    difftext = xdev.difftext(text, new_text, context_lines=3, colored=True)
    did_anything = bool(difftext.strip())

    if verbose > 1:
        if not did_anything:
            print('No difference!')

    if diff:
        print(difftext)

    if write:
        # Write the file
        if did_anything:
            if verbose > 1:
                print('writing to fpath = {}'.format(ub.repr2(fpath, nl=1)))
            with open(fpath, 'w') as file:
                file.write(new_text)
    else:
        if not diff:
            if verbose > 1:
                print('dump formatted text to stdout')
            print(new_text)
Beispiel #13
0
 def _print_previous_loop_statistics(infr, count):
     # Print stats about what happend in the this loop
     history = infr.metrics_list[-count:]
     recover_blocks = ut.group_items([
         (k, sum(1 for i in g))
         for k, g in it.groupby(ut.take_column(history, 'recovering'))
     ]).get(True, [])
     infr.print(
         ('Recovery mode entered {} times, '
          'made {} recovery decisions.').format(len(recover_blocks),
                                                sum(recover_blocks)),
         color='green',
     )
     testaction_hist = ut.dict_hist(ut.take_column(history, 'test_action'))
     infr.print(
         'Test Action Histogram: {}'.format(
             ut.repr4(testaction_hist, si=True)),
         color='yellow',
     )
     if infr.params['inference.enabled']:
         action_hist = ut.dict_hist(
             ut.emap(frozenset, ut.take_column(history, 'action')))
         infr.print(
             'Inference Action Histogram: {}'.format(
                 ub.repr2(action_hist, si=True)),
             color='yellow',
         )
     infr.print(
         'Decision Histogram: {}'.format(
             ut.repr2(ut.dict_hist(ut.take_column(history,
                                                  'pred_decision')),
                      si=True)),
         color='yellow',
     )
     infr.print(
         'User Histogram: {}'.format(
             ut.repr2(ut.dict_hist(ut.take_column(history, 'user_id')),
                      si=True)),
         color='yellow',
     )
Beispiel #14
0
def test_incomp_inference():
    infr = demo.demodata_infr(num_pccs=0)
    # Make 2 consistent and 2 inconsistent CCs
    infr.add_feedback((1, 2), POSTV)
    infr.add_feedback((2, 3), POSTV)
    infr.add_feedback((3, 4), POSTV)
    infr.add_feedback((4, 1), POSTV)
    # -----
    infr.add_feedback((11, 12), POSTV)
    infr.add_feedback((12, 13), POSTV)
    infr.add_feedback((13, 14), POSTV)
    infr.add_feedback((14, 11), POSTV)
    infr.add_feedback((12, 14), NEGTV)
    # -----
    infr.add_feedback((21, 22), POSTV)
    infr.add_feedback((22, 23), POSTV)
    infr.add_feedback((23, 21), NEGTV)
    # -----
    infr.add_feedback((31, 32), POSTV)
    infr.add_feedback((32, 33), POSTV)
    infr.add_feedback((33, 31), POSTV)
    infr.add_feedback((2, 32), NEGTV)
    infr.add_feedback((3, 33), NEGTV)
    infr.add_feedback((12, 21), NEGTV)
    # -----
    # Incomparable within CCs
    print('==========================')
    infr.add_feedback((1, 3), INCMP)
    infr.add_feedback((1, 4), INCMP)
    infr.add_feedback((1, 2), INCMP)
    infr.add_feedback((11, 13), INCMP)
    infr.add_feedback((11, 14), INCMP)
    infr.add_feedback((11, 12), INCMP)
    infr.add_feedback((1, 31), INCMP)
    infr.add_feedback((2, 32), INCMP)
    infr.add_feedback((12, 21), INCMP)
    infr.add_feedback((23, 21), INCMP)
    infr.add_feedback((12, 14), INCMP)
    print('Final state:')
    print(ub.repr2(sorted(infr.gen_edge_attrs('decision'))))
Beispiel #15
0
    def fit2(harn, prevstate_fpath=None, dry=False):
        from pysseg.backend.find_segnet_caffe import import_segnet_caffe
        from pysseg.backend import iface_caffe as iface
        caffe = import_segnet_caffe(gpu_num=harn.gpu_num)

        harn.prepare_solver()

        solver_info = iface.parse_solver_info(harn.solver_fpath)
        snapshot_iters = solver_info['snapshot']

        # Assuming that the solver .prototxt has already been configured including
        # the corresponding training and testing network definitions (as .prototxt).
        solver = caffe.SGDSolver(harn.solver_fpath)

        pretrained = harn.init_pretrained_fpath

        prev_iter = 0
        if prevstate_fpath is not None:
            print('Restoring State from {}'.format(prevstate_fpath))
            solver.restore(prevstate_fpath)
            prev_iter = iface.snapshot_iterno(prevstate_fpath)
        elif pretrained is not None:
            # https://github.com/BVLC/caffe/issues/3336
            print(
                'Loading pretrained model weights from {}'.format(pretrained))
            solver.net.copy_from(pretrained)

        # net = self.solver.net
        # Do iterations over batches
        # prev = None
        n_steps = solver_info['display']
        bx = prev_iter
        while bx < solver_info['max_iter']:
            # Run until we can produce a snapshot
            info = solver.step(n_steps)
            print('bx = {!r}'.format(bx))
            print('step info = {}'.format(ub.repr2(info)))
            bx += n_steps
            yield bx
Beispiel #16
0
def main(cmdline=True, **kw):
    config = ConvertConfig(default=kw, cmdline=cmdline)
    print('config = {}'.format(ub.repr2(dict(config), nl=1)))
    # TODO: ability to map image ids to agree with another coco file
    csv_fpaths = config['src']
    new_root = config['new_root']
    old_root = config['old_root']
    images = config['images']
    dst_fpath = config['dst']

    dst_root = dirname(dst_fpath)
    dset = coco_from_viame_csv(csv_fpaths, images)
    dset.fpath = dst_fpath
    dset.img_root = dst_root
    try:
        dset.reroot(new_root=new_root, old_root=old_root, check=1)
    except Exception as ex:
        print('Reroot failed')
        print('ex = {!r}'.format(ex))

    print('dset.fpath = {!r}'.format(dset.fpath))
    dset.dump(dset.fpath, newlines=True)
Beispiel #17
0
def extract_ggr_pccs(coco_dset):
    import graphid
    graph = graphid.api.GraphID()
    graph.add_annots_from(coco_dset.annots().aids)
    infr = graph.infr
    infr.params['inference.enabled'] = False
    all_aids = list(coco_dset.annots().aids)
    aids_set = set(all_aids)

    for aid1 in ub.ProgIter(all_aids, desc='construct graph'):
        annot = coco_dset.anns[aid1]

        # resolve duplicate reviews (take the last one)
        aid2_to_decision = {}
        for aid2, decision in annot['review_ids']:
            aid2_to_decision[aid2] = decision

        for aid2, decision in aid2_to_decision.items():
            if aid2 not in aids_set:
                # hack because data is setup wrong
                continue
            edge = (aid1, aid2)
            if decision == 'positive':
                infr.add_feedback(edge, evidence_decision=graphid.core.POSTV)
            elif decision == 'negative':
                infr.add_feedback(edge, evidence_decision=graphid.core.NEGTV)
            elif decision == 'incomparable':
                infr.add_feedback(edge, evidence_decision=graphid.core.INCMP)
            else:
                raise KeyError(decision)
    infr.params['inference.enabled'] = True
    infr.apply_nondynamic_update()
    print('status = {}' + ub.repr2(infr.status(True)))
    pccs = list(map(frozenset, infr.positive_components()))
    for pcc in pccs:
        for aid in pcc:
            print('aid = {!r}'.format(aid))
            assert aid in coco_dset.anns
    return pccs
Beispiel #18
0
def test_negative_newlines():
    import ubelt as ub
    dict_ = {
        'k1': [[1, 2, 3], [4, 5, 6]],
        'k2': [[[1, 2, [1, 2, 3]], [1, 2, 3], 3], [4, 5, 6]],
        'k3': [1, 2, 3],
        'k4': [[[1, 2, 3], 2, 3], [4, 5, 6]],
    }
    text = ub.repr2(dict_, nl=-1)
    print(text)
    assert text == ub.codeblock('''
        {
            'k1': [
                [1, 2, 3],
                [4, 5, 6]
            ],
            'k2': [
                [
                    [
                        1,
                        2,
                        [1, 2, 3]
                    ],
                    [1, 2, 3],
                    3
                ],
                [4, 5, 6]
            ],
            'k3': [1, 2, 3],
            'k4': [
                [
                    [1, 2, 3],
                    2,
                    3
                ],
                [4, 5, 6]
            ]
        }
        ''')
Beispiel #19
0
    def refresh_candidate_edges(infr):
        """
        Search for candidate edges.
        Assign each edge a priority and add to queue.
        """
        infr.print('refresh_candidate_edges', 1)
        infr.assert_consistency_invariant()

        if infr.ibs is not None:
            candidate_edges = infr.find_lnbnn_candidate_edges()
        elif hasattr(infr, 'dummy_verif'):
            infr.print('Searching for dummy candidates')
            infr.print('dummy vsone params =' +
                       ub.repr2(infr.dummy_verif.dummy_params, nl=1, si=True))
            ranks_top = infr.params['ranking.ntop']
            candidate_edges = infr.dummy_verif.find_candidate_edges(
                K=ranks_top)
        else:
            raise Exception(
                'No method available to search for candidate edges')
        infr.add_candidate_edges(candidate_edges)
        infr.assert_consistency_invariant()
Beispiel #20
0
    def main(cls, cmdline=True, **kw):
        """
        Example:
            >>> kw = {'src': ['special:shapes8', 'special:shapes1']}
            >>> cmdline = False
            >>> cls = CocoUnionCLI
            >>> cls.main(cmdline, **kw)
        """
        import kwcoco
        config = cls.CLIConfig(kw, cmdline=cmdline)
        print('config = {}'.format(ub.repr2(dict(config), nl=1)))

        if config['src'] is None:
            raise Exception('must specify sources: {}'.format(config['src']))

        if len(config['src']) == 0:
            raise ValueError('Must provide at least one input dataset')

        datasets = []
        for fpath in ub.ProgIter(config['src'], desc='reading datasets',
                                 verbose=1):
            print('reading fpath = {!r}'.format(fpath))
            dset = kwcoco.CocoDataset.coerce(fpath)

            if config['absolute']:
                dset.reroot(absolute=True)

            datasets.append(dset)

        combo = kwcoco.CocoDataset.union(*datasets)

        out_fpath = config['dst']
        out_dpath = dirname(out_fpath)
        if out_dpath:
            ub.ensuredir(out_dpath)
        print('Writing to out_fpath = {!r}'.format(out_fpath))
        combo.fpath = out_fpath
        combo.dump(combo.fpath, newlines=True)
Beispiel #21
0
    def fit(self, prevstate_fpath):
        from pysseg.backend.find_segnet_caffe import import_segnet_caffe
        from pysseg.backend import iface_caffe as iface
        harn = self.harn
        caffe = import_segnet_caffe(gpu_num=harn.gpu_num)

        harn.prepare_solver()

        solver_info = iface.parse_solver_info(harn.solver_fpath)

        model_fpath = solver_info['train_model_path']
        model_info = iface.parse_model_info(model_fpath)

        self.solver = caffe.SGDSolver(harn.solver_fpath)

        pretrained = harn.init_pretrained_fpath

        if prevstate_fpath is not None:
            print('Restoring State from {}'.format(prevstate_fpath))
            self.solver.restore(prevstate_fpath)
        elif pretrained is not None:
            print(
                'Loading pretrained model weights from {}'.format(pretrained))
            self.solver.net.copy_from(pretrained)

        layers = model_info['layer']
        start_layer = layers[1]['name']

        # Do iterations over batches
        for bx in range(solver_info['max_iter']):
            self.load_batch_data(bx)
            outputs = self.solver.net.forward(start=start_layer)
            import ubelt as ub
            print(ub.repr2(outputs))
            self.solver.net.backwards()
            # need to manually update weights. bleh...
            self.update(bx)
Beispiel #22
0
def read_raw_categories():
    cfg = viame_wrangler.config.WrangleConfig()
    img_root = cfg.img_root
    annot_dir = cfg.annot_dir
    fpaths = list(glob.glob(join(annot_dir, '*.json')))

    print('Reading')
    dsets = [
        coco_wrangler.CocoDataset(fpath, autobuild=True) for fpath in fpaths
    ]

    if 0:
        for dset in dsets:
            print(dset.img_root)
            # print(ub.repr2([d['name'] for d in dset.cats.values()]))
            # print(ub.repr2(dset.basic_stats()))
            print(ub.repr2(dset.category_annotation_frequency()))

    print('Merging')
    merged = coco_wrangler.CocoDataset.union(*dsets)
    merged.img_root = img_root
    # merged._run_fixes()
    # print(ub.repr2(merged.category_annotation_frequency()))

    tree0 = viame_wrangler.lifetree.LifeCatalog(autoparse=True)
    mapper = viame_wrangler.cats_2018.make_raw_category_mapping(merged, tree0)
    merged.rename_categories(mapper)

    print('Building')
    node_to_freq = merged.category_annotation_frequency()
    for node in tree0.G.nodes():
        tree0.G.node[node]['freq'] = node_to_freq.get(node, 0)
    tree0.accumulate_frequencies()
    tree0.remove_unsupported_nodes()
    if DRAW:
        tree0.draw('c0-fine-classes-raw.png')
    return tree0, merged, mapper
Beispiel #23
0
def description():
    import bs4
    import requests
    resp = requests.get(
        'https://gwg.nga.mil/ntb/baseline/software/testfile/Nitfv2_1/scen_2_1.html',
        verify=False)
    soup = bs4.BeautifulSoup(resp.text, 'html.parser')
    tables = soup.findAll('table')

    names_noext = [n.split('.')[0] for n in NITF_TEST_NAMES]

    name = None
    name_to_desc = {}

    for tab in tables:
        for td in tab.findAll('td'):
            if name is not None:
                desc = td.text.strip()
                name_to_desc[name] = desc.replace('\r', '').replace(
                    '\n', '').replace('\t', '').replace('\xa0', '')
                name = None
            elif td.text.strip() in names_noext:
                name = td.text.strip()
    print(ub.repr2(name_to_desc, nl=1))
Beispiel #24
0
def main():
    import timerit
    import ubelt as ub
    import random
    import string

    # expected = "58178059833426840615453390153965"
    length = 20
    expected = ''.join(random.choices(string.printable, k=length))

    def flip_char(text, pos):
        old = text[pos]
        new = random.choice(string.printable)
        while new == old:
            pass
        before = text[:pos - 1]
        after = text[pos:]
        return before + new + after

    variants = dict(
        ne_first=flip_char(expected, 0),
        ne_mid=flip_char(expected, length // 2),
        ne_last=flip_char(expected, length - 1),
        too_long='F' * len(expected) * 10,
        too_short='F',
        correct=expected,
    )

    ti = timerit.Timerit(10000000, bestof=10, verbose=2)

    for key, value in variants.items():
        for _ in ti.reset(key):
            value == expected

    print('ti.rankings = {}'.format(
        ub.repr2(ti.rankings['min'], nl=2, align=':')))
Beispiel #25
0
    def closure_(obj, name):
        # TODO: handle assignments
        if name in visitor.import_lines:
            # Check and see if the name was imported from elsewhere
            return 'import', visitor.import_lines[name]
        elif name in visitor.assignments:
            type_, value = visitor.assignments[name]
            if type_ == 'node':
                # TODO, need to handle non-simple expressions
                return type_, '{} = {}'.format(name, value.value.id)
            else:
                # when value is a dict we need to be sure it is
                # extracted in the same order as we see it
                return type_, '{} = {}'.format(name, ub.repr2(value))
        elif isinstance(obj, types.FunctionType):
            if obj.__module__ == module_name:
                sourcecode = inspect.getsource(obj)
                return 'code', sourcecode
        elif isinstance(obj, type):
            if obj.__module__ == module_name:
                sourcecode = inspect.getsource(obj)
                return 'code', sourcecode

        raise NotImplementedError(str(obj) + ' ' + str(name))
Beispiel #26
0
def test_hash_data():
    counter = [0]
    failed = []
    def check_hash(want, input_):
        count = counter[0] = counter[0] + 1
        got = ub.hash_data(input_)
        # assert got.startswith(want), 'want={}, got={}'.format(want, got)
        print('check_hash({!r}, {!r})'.format(got, input_))
        if want is not None and not got.startswith(want):
            item = (got, input_, count, want)
            failed.append(item)

    check_hash('egexcbwgdtmjrzafljtjwqpgfhmfetjs', '1')
    check_hash('hjvebphzylxgtxncyphclsjglvmstsbq', ['1'])
    check_hash('hjvebphzylxgtxncyphclsjglvmstsbq', tuple(['1']))
    check_hash('ftzqivzayzivmobwymodjnnzzxzrvvjz', b'12')
    check_hash('jiwjkgkffldfoysfqblsemzkailyridf', [b'1', b'2'])
    check_hash('foevisahdffoxfasicvyklrmuuwqnfcc', ['1', '2', '3'])
    check_hash('rkcnfxkjwkrfejhbpcpopmyubhbvonkt', ['1', np.array([1, 2, 3], dtype=np.int64), '3'])
    check_hash('lxssoxdkstvccsyqaybaokehclyctgmn', '123')
    check_hash('fpvptydigvgjimbzadztgpvjpqrevwcq', zip([1, 2, 3], [4, 5, 6]))

    print(ub.repr2(failed, nl=1))
    assert len(failed) == 0
Beispiel #27
0
def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()
Beispiel #28
0
    def compute_likely_overlaps(pfiles1, pfiles2):
        step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1)
        step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2)
        step_idx = min(step_idx1, step_idx2)
        grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx)
        grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx)

        thresh = 0.2
        verbose = 1

        # TODO: it would be nice if we didn't have to care about internal
        # deduplication when we attempt to find cross-set overlaps
        dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)
        dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        pfiles = inv1.pfiles + inv2.pfiles
        dups3 = ProgressiveFile.likely_duplicates(pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        only_on_inv2 = {}
        for key, group in dups3.items():
            if not any(
                    item.fpath.startswith(inv1.root_fpath) for item in group):
                only_on_inv2[key] = group

        for p1 in inv1.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath:
                break

        for p2 in inv2.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath:
                break

        look = list(ub.flatten(only_on_inv2.values()))
        takealook = sorted([p.fpath for p in look])
        print('takealook = {}'.format(ub.repr2(takealook, nl=1)))

        keys1 = set(grouped1)
        keys2 = set(grouped2)

        missing_keys2 = keys2 - keys1
        missing_groups2 = ub.dict_subset(grouped2, missing_keys2)

        missing_fpaths2 = []
        for key, values in missing_groups2.items():
            print('key = {!r}'.format(key))
            print('values = {}'.format(ub.repr2(values, nl=1)))
            missing_fpaths2.extend(values)

        missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2])
        print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1)))
        # pass

        import xdev
        set_overlaps = xdev.set_overlaps(keys1, keys2)
        print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
Beispiel #29
0
def main():
    from sqlalchemy.orm import sessionmaker
    import ubelt as ub
    from sqlalchemy import create_engine

    engine = create_engine('sqlite:///:memory:')
    Base.metadata.create_all(engine)
    DBSession = sessionmaker(bind=engine)
    session = DBSession()

    session.add(Annotation(id=1, image_id=1, bbox=[13, 13, 28, 15]))
    session.add(Annotation(id=2, image_id=2, bbox=[13, 13, 28, 15]))
    session.add(Annotation(id=3, image_id=2, bbox=[18, 10, 25, 17]))
    session.add(Annotation(id=4, image_id=4, bbox=[13, 10, 25, 17]))

    session.add(Image(id=1, file_name='img1.jpg'))
    session.add(Image(id=2, file_name='img2.jpg'))
    session.add(Image(id=3, file_name='img3.jpg'))
    session.add(Image(id=4, file_name='img4.jpg'))
    session.add(Image(id=5, file_name='img5.jpg'))
    session.commit()

    import pandas as pd
    print(pd.read_sql_table('annotations', con=engine))
    print(pd.read_sql_table('images', con=engine))

    # Args:
    parent_keyattr = Image.id
    keyattr = Annotation.image_id
    valattr = Annotation.id
    """

    -----------

    ## A Correct Solution With Raw SQL ##

    """

    ###
    # Raw SQLite: Does exactly what I want
    ###
    parent_table = parent_keyattr.class_.__tablename__
    table = keyattr.class_.__tablename__
    parent_keycol = parent_table + '.' + parent_keyattr.name
    keycol = table + '.' + keyattr.name
    valcol = table + '.' + valattr.name
    expr = ('SELECT {parent_keycol}, json_group_array({valcol}) '
            'FROM {parent_table} '
            'LEFT OUTER JOIN {table} ON {keycol} = {parent_keycol} '
            'GROUP BY {parent_keycol} ORDER BY {parent_keycol}').format(
                parent_table=parent_table,
                table=table,
                parent_keycol=parent_keycol,
                keycol=keycol,
                valcol=valcol,
            )
    print(expr)
    import json
    result = session.execute(expr)
    final = []
    for row in result.fetchall():
        key = row[0]
        group = json.loads(row[1])
        if group[0] is None:
            group = set()
        else:
            group = set(group)
        tup = (key, group)
        final.append(tup)

    print('final = {}'.format(ub.repr2(final, nl=1)))
    """
    This expands out to:

        SELECT images.id, json_group_array(annotations.id)
        FROM images
        LEFT OUTER JOIN annotations
        ON annotations.image_id = images.id
        GROUP BY images.id ORDER BY images.id

    and with some post-processing on row[1] returns:

    ```
    final = [
        (1, {1}),
        (2, {2, 3}),
        (3, {}),
        (4, {4}),
        (5, {}),
    ]
    ```

    The images 3 and 5 without annotations are correctly accounted for.

    But I'm having a very hard time figuring out how to do the equivalent
    behavior with SQLAlchemy. I've tried several variation:


    -----------

    ## An Almost Correct Solution With SQLAlchemy ##

    """

    # SQLite Alchemy
    ###
    # VERSION 1: Does not correctly return null for images without annotations
    ###

    grouped_vals = sqlalchemy.func.json_group_array(valattr, type_=JSON)
    parent_table = parent_keyattr.class_.__table__
    table = keyattr.class_.__table__
    # TODO: This might have to be different for PostgreSQL
    grouped_vals = sqlalchemy.func.json_group_array(valattr, type_=JSON)
    query = (session.query(keyattr, grouped_vals).outerjoin(
        parent_table, parent_keyattr == keyattr).group_by(
            parent_keyattr).order_by(parent_keyattr))
    print(query.statement)

    final = []
    for row in query.all():
        key = row[0]
        group = row[1]
        if group[0] is None:
            group = set()
        else:
            group = set(group)
        tup = (key, group)
        final.append(tup)
    print('final = {}'.format(ub.repr2(final, nl=1)))
    """
    This expands to:
        SELECT annotations.image_id, json_group_array(annotations.id) AS json_group_array_1
        FROM annotations LEFT OUTER JOIN images
        ON images.id = annotations.image_id
        GROUP BY images.id ORDER BY images.id

    And returns:

    ```
    final = [
        (1, {1}),
        (2, {2, 3}),
        (4, {4}),
    ]
    ```

    which is missing the values for images 3 and 5. This is because I queried
    on `keyattr` (annotations.image_id) instead of `parent_keyattr` (images.id).

    -----------

    ## An Attempt To Fix The Issue ##

    But if I try to use parent_keyattr I get an error when I try the outer join
    """

    query = (session.query(parent_keyattr,
                           grouped_vals).outerjoin(parent_table,
                                                   parent_keyattr == keyattr))
    """

    Looking at:

    `print(session.query(parent_keyattr, grouped_vals))`

    this makes sense because I get:

    ```
    SELECT images.id AS images_id, json_group_array(annotations.id) AS json_group_array_1
    FROM images, annotations
    ```

    The issue is the both images and annotations are in the FROM statement.

    I'm not sure if there is a way to force `grouped_vals` to think its FROM
    statement targets the annotations table. I've tried several variants but have
    had little luck sofar.

    -----------

    ## A Better But Not Perfect Fix ##


    The best luck I've had was by wrapping `grouped_vals` in a `str`. Which does
    let me get exactly what I want, but I lose the nice `type_=JSON` that
    automatically took care of converting the result to json for me.

    """

    query = (session.query(parent_keyattr, str(grouped_vals)).outerjoin(
        table, parent_keyattr == keyattr).group_by(parent_keyattr).order_by(
            parent_keyattr))
    print(query.statement)

    final = []
    for row in query.all():
        key = row[0]
        group = json.loads(row[1])
        if group[0] is None:
            group = set()
        else:
            group = set(group)
        tup = (key, group)
        final.append(tup)
    print('final = {}'.format(ub.repr2(final, nl=1)))
    """

    I would like to know if there is a way to force `grouped_vals` to target the
    "images" table instead of "annotations", so I don't have to wrap it in a
    string, and I don't have to manually convert to JSON.

    """

    print(
        session.query(parent_keyattr.expression,
                      grouped_vals).select_from(parent_table))

    subq = session.query(parent_keyattr.expression, grouped_vals).subquery()
    y = subq.outerjoin(table, parent_keyattr == keyattr).select()
    z = y.group_by(parent_keyattr).order_by(parent_keyattr)
    print(z)

    z.all()
    print(subq)
    print(subq.outerjoin(table, parent_keyattr == keyattr))

    x = session.query(
        parent_keyattr.expression,
        grouped_vals.select().select_from(parent_table)).subquery()
    x.outerjoin(table, parent_keyattr == keyattr)
    # .group_by(parent_keyattr).order_by(parent_keyattr)
    print(x)

    z = session.query(parent_keyattr).outerjoin(table,
                                                parent_keyattr == keyattr)
    z = session.query(parent_keyattr).outerjoin(table,
                                                parent_keyattr == keyattr)
    z.all()

    query = (session.query(parent_keyattr, str(grouped_vals)).outerjoin(
        table, parent_keyattr == keyattr).group_by(parent_keyattr).order_by(
            parent_keyattr))
    print(query.statement)

    ojoin = parent_table.outerjoin(table, parent_keyattr == keyattr)
    z = ojoin.select()
    sub = session.query(z).subquery()
    print(sub)

    print(session.query(z))
    # .all()

    z = ojoin.select()
    session.execute(z).fetchall()

    sel = sqlalchemy.select([parent_keyattr, grouped_vals]).select_from()
    print(sel)
    session.execute(sel)
    """
            jon/viame/master jon/viame/next master dev/tracking-framework
            viame/master viame/query-wip viame/tracking-work
            viame/master-no-pybind viame/master-w-pytorch
            "
    """

    # branches = [x.strip() for x in '''
    #             jon/viame/master
    #             jon/viame/next
    #             master
    #             dev/tracking-framework
    #             viame/master
    #             viame/query-wip
    #             viame/tracking-work
    #             viame/master-no-pybind
    #             viame/master-w-pytorch
    #             '''.splitlines() if x.strip()]

    import sys
    argv = sys.argv[1:]

    branches = []
    for item in argv:
        for sub in item.split():
            sub = sub.strip()
            if sub:
                branches.append(sub)

    print('branches = {}'.format(ub.repr2(branches)))
    check_relationships(branches)
Beispiel #31
0
def main():
    # TODO: progressive hashing data structure
    inv1 = Inventory('/media/joncrall/raid/', blocklist)
    inv2 = Inventory('/media/joncrall/media', blocklist)

    # inv1 = Inventory('/media/joncrall/raid/Applications/NotGames', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications/NotGames', blocklist)
    # inv1 = Inventory('/media/joncrall/raid/Applications', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications', blocklist)

    self = inv1  # NOQA

    inv1.build()
    inv2.build()

    thresh = {
        'frac': 0.5,
        'byte':
        100 * int(2**20)  # only use the first few mb to determine overlap
    }
    verbose = 1
    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles
    overlap, only1, only2 = ProgressiveFile.likely_overlaps(pfiles1,
                                                            pfiles2,
                                                            thresh=thresh,
                                                            verbose=verbose)

    stats = {
        'overlap': len(overlap),
        'only1': len(only1),
        'only2': len(only2),
    }
    print('stats = {}'.format(ub.repr2(stats, nl=1)))
    only2_list = sorted([p.fpath for group in only2.values() for p in group])
    print('only2_list = {}'.format(ub.repr2(only2_list, nl=1)))
    print('stats = {}'.format(ub.repr2(stats, nl=1)))

    # for pfile in inv1.pfiles:
    #     pfile._check_integrity()

    import numpy as np
    mb_read = np.array([
        pfile._parts[-1][1] / int(2**20) for pfile in ub.ProgIter(inv2.pfiles)
    ])
    mb_read.max()
    mb_read.min()

    # Build all hashes up to a reasonable degree
    inv1.build_hashes(max_workers=0)

    maybe_dups = inv1.likely_duplicates(thresh=0.2)
    len(maybe_dups)

    maybe_dups = ub.sorted_keys(maybe_dups, key=lambda x: x[2])

    import networkx as nx
    import itertools as it
    # Check which directories are most likely to be duplicates
    graph = nx.Graph()

    for key, group in ub.ProgIter(maybe_dups.items(),
                                  total=len(maybe_dups),
                                  desc='build dup dir graph'):
        if key[0] == '':
            continue
        dpaths = [dirname(pfile.fpath) for pfile in group]
        for d1, d2 in it.combinations(dpaths, 2):
            graph.add_edge(d1, d2)
            edge = graph.edges[(d1, d2)]
            if 'dups' not in edge:
                edge['dups'] = 0
            edge['dups'] += 1

    edge_data = list(graph.edges(data=True))

    for dpath in ub.ProgIter(graph.nodes, desc='find lens'):
        num_children = len(os.listdir(dpath))
        graph.nodes[dpath]['num_children'] = num_children

    for d1, d2, dat in edge_data:
        nc1 = graph.nodes[d1]['num_children']
        nc2 = graph.nodes[d2]['num_children']
        ndups = dat['dups']
        dup_score = (dat['dups'] / min(nc1, nc2))
        dat['dup_score'] = dup_score
        if dup_score > 0.9:
            print('dup_score = {!r}'.format(dup_score))
            print('d1 = {!r}'.format(d1))
            print('d2 = {!r}'.format(d2))
            print('nc1 = {!r}'.format(nc1))
            print('nc2 = {!r}'.format(nc2))
            print('ndups = {!r}'.format(ndups))

    print('edge_data = {}'.format(ub.repr2(edge_data, nl=2)))

    print('maybe_dups = {}'.format(ub.repr2(maybe_dups.keys(), nl=3)))
    for key, group in maybe_dups.items():
        if key[0] == '':
            continue
        print('key = {!r}'.format(key))
        print('group = {}'.format(ub.repr2(group, nl=1)))
        for pfile in group:
            pfile.refined_to(float('inf'))

        print('key = {!r}'.format(key))

    inv2.build_hashes(max_workers=6, mode='thread')

    inv1.pfiles = [
        p for p in ub.ProgIter(inv1.pfiles, desc='exist check')
        if exists(p.fpath)
    ]
    inv2.pfiles = [
        p for p in ub.ProgIter(inv2.pfiles, desc='exist check')
        if exists(p.fpath)
    ]

    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles

    def compute_likely_overlaps(pfiles1, pfiles2):
        step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1)
        step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2)
        step_idx = min(step_idx1, step_idx2)
        grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx)
        grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx)

        thresh = 0.2
        verbose = 1

        # TODO: it would be nice if we didn't have to care about internal
        # deduplication when we attempt to find cross-set overlaps
        dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)
        dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        pfiles = inv1.pfiles + inv2.pfiles
        dups3 = ProgressiveFile.likely_duplicates(pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        only_on_inv2 = {}
        for key, group in dups3.items():
            if not any(
                    item.fpath.startswith(inv1.root_fpath) for item in group):
                only_on_inv2[key] = group

        for p1 in inv1.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath:
                break

        for p2 in inv2.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath:
                break

        look = list(ub.flatten(only_on_inv2.values()))
        takealook = sorted([p.fpath for p in look])
        print('takealook = {}'.format(ub.repr2(takealook, nl=1)))

        keys1 = set(grouped1)
        keys2 = set(grouped2)

        missing_keys2 = keys2 - keys1
        missing_groups2 = ub.dict_subset(grouped2, missing_keys2)

        missing_fpaths2 = []
        for key, values in missing_groups2.items():
            print('key = {!r}'.format(key))
            print('values = {}'.format(ub.repr2(values, nl=1)))
            missing_fpaths2.extend(values)

        missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2])
        print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1)))
        # pass

        import xdev
        set_overlaps = xdev.set_overlaps(keys1, keys2)
        print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
        # We want to know what files in set2 do not exist in set1

    if 0:
        fpath = inv1.all_fpaths[0]
        pfile = ProgressiveFile(fpath)

        fpath1 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Transfer/Zebras/DownloadedLibraries/lightspeed/solve_triu.m'
        fpath2 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Zebras/downloaded_libraries/lightspeed/solve_triu.m'

        fpath1 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Falco/DarkFalco02.pcs'
        fpath2 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Ivysaur/Kraid-v2-Ivy.pcs'

        pfile = pfile1 = ProgressiveFile(fpath1)
        pfile2 = ProgressiveFile(fpath2)

        pfile.maybe_equal(pfile2, thresh=0.1)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]
        # fpaths = hash_groups1_dup['ef46db3751d8e999']
        pfiles_demodata = [ProgressiveFile(f) for f in fpath_demodata]

        def progressive_duplicates(pfiles, idx=1):
            step_ids = [pfile.refined_to(idx) for pfile in ub.ProgIter(pfiles)]
            final_groups = {}
            grouped = ub.group_items(pfiles, step_ids)
            for key, group in grouped.items():
                if len(group) > 1:
                    if all(not g.can_refine for g in group):
                        # Group is ~100% a real duplicate
                        final_groups[key] = group
                    else:
                        pfiles = group
                        deduped = progressive_duplicates(pfiles, idx=idx + 1)
                        final_groups.update(deduped)
                else:
                    final_groups[key] = group
            return final_groups

        pfiles = pfiles_demodata
        final_groups = progressive_duplicates(pfiles)

        for key, group in final_groups.items():
            if len(group) > 1:
                print('key = {!r}'.format(key))
                print('group = {}'.format(ub.repr2(group, nl=1)))

        inv1.build_hashes()
        inv2.build_hashes()

        hash_groups1 = ub.group_items(inv1.all_fpaths, inv1.all_hashes)
        hash_groups2 = ub.group_items(inv2.all_fpaths, inv2.all_hashes)

        hash_groups1_dup = {
            k: v
            for k, v in hash_groups1.items() if len(v) > 1
        }
        hash_groups2_dup = {
            k: v
            for k, v in hash_groups2.items() if len(v) > 1
        }
        len(hash_groups1_dup)
        len(hash_groups2_dup)

        # common = set(hash_groups1) & set(hash_groups2)
        # xdev.set_overlaps(hash_groups1, hash_groups2)

        fnames1 = ub.group_items(inv1.all_fpaths, key=basename)
        fnames2 = ub.group_items(inv2.all_fpaths, key=basename)

        missing = ub.dict_diff(fnames2, fnames1)
        sorted(ub.flatten(missing.values()))
        len(missing)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]

        def internal_deduplicate(self):
            hash_groups = ub.group_items(self.all_fpaths, self.all_hashes)
            hash_groups_dup = {
                k: v
                for k, v in hash_groups.items() if len(v) > 1
            }

            from os.path import dirname

            hash_groups_dup['ef46db3751d8e999']

            for key, values in hash_groups_dup.items():
                for v in values:
                    if v.endswith('.avi'):
                        break

                [basename(v) for v in values]
                [dirname(v) for v in values]
Beispiel #32
0
def git_squash_streaks():
    """
    git-squash-streaks

    Usage:
        See argparse
    """
    import argparse
    try:
        import argcomplete
    except ImportError:
        argcomplete = None
        raise
    description, help_dict = _autoparse_desc(squash_streaks)

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(*('--timedelta',), type=str,
                        help=help_dict['timedelta'])

    parser.add_argument(*('--custom_streak',), nargs=2,
                        help='hack to specify one custom streak')

    parser.add_argument(*('--pattern',), type=str,
                        help=help_dict['pattern'])

    parser.add_argument(*('--tags',), action='store_true', help='experimental')

    parser.add_argument(*('--no-preserve-tags',), dest='preserve_tags',
                        action='store_false', help=help_dict['preserve_tags'])

    parser.add_argument(*('--oldest-commit',), dest='oldest_commit',
                        help=help_dict['oldest_commit'])

    parser.add_argument(*('--inplace',), action='store_true',
                        help=help_dict['inplace'])

    parser.add_argument(*('--auto-rollback',), action='store_true',
                        dest='auto_rollback', help=help_dict['auto_rollback'])

    parser.add_argument('--authors', type=str,
                        help=(help_dict['authors'] +
                              ' Defaults to your git config user.name'))

    group = parser.add_mutually_exclusive_group()
    group.add_argument(*('-n', '--dry'), dest='dry', action='store_true',
                        help=help_dict['dry'])
    group.add_argument(*('-f', '--force'), dest='dry', action='store_false',
                        help='opposite of --dry')

    group = parser.add_mutually_exclusive_group()
    group.add_argument(*('-v', '--verbose'), dest='verbose', action='store_const',
                       const=1, help='verbosity flag flag')
    group.add_argument(*('-q', '--quiet'), dest='verbose', action='store_const',
                       const=0, help='suppress output')

    parser.set_defaults(
        tags=False,
        inplace=False,
        preserve_tags=True,
        auto_rollback=False,
        authors=None,
        pattern=None,
        timedelta='sameday',
        dry=True,
        verbose=True,
    )
    if argcomplete:
        argcomplete.autocomplete(parser)
    args = parser.parse_args()

    # Postprocess args
    ns = args.__dict__.copy()

    if ns.pop('tags'):
        do_tags()
        return

    try:
        ns['timedelta'] = float(ns['timedelta'])
    except ValueError:
        valid_timedelta_categories = ['sameday', 'alltime']
        if ns['timedelta'] not in valid_timedelta_categories:
            raise ValueError('timedelta = {}'.format(ns['timedelta']))

    if ns['authors'] is None:
        ns['authors'] = {git.Git().config('user.name')}
        # HACK: for me. todo user alias
        # SEE: .mailmap file to auto extract?
        # https://git-scm.com/docs/git-shortlog#_mapping_authors
        """
        # .mailmap
        # Proper Name <*****@*****.**> Commit Name <*****@*****.**>
        Jon Crall <*****@*****.**> joncrall <*****@*****.**>
        Jon Crall <*****@*****.**> jon.crall <*****@*****.**>
        Jon Crall <*****@*****.**> Jon Crall <*****@*****.**>
        Jon Crall <*****@*****.**> joncrall <*****@*****.**>
        Jon Crall <*****@*****.**> joncrall <*****@*****.**>
        Jon Crall <*****@*****.**> Jon Crall <*****@*****.**>
        """
        if {'joncrall', 'Jon Crall', 'jon.crall'}.intersection(ns['authors']):
            ns['authors'].update({'joncrall', 'Jon Crall'})
    else:
        ns['authors'] = {a.strip() for a in ns['authors'].split(',')}

    print(ub.repr2(ns, nl=1))

    squash_streaks(**ns)

    if ns['dry']:
        if ns['verbose']:
            print('Finished the dry run. Use -f to force')
Beispiel #33
0
def compare_results():
    print('Comparing results')
    import pandas as pd
    from tabulate import tabulate

    # Read in output of demo script
    measure_fpath = 'measurements_haul83.csv'
    py_df = pd.DataFrame.from_csv(measure_fpath, index_col=None)
    # Convert python length output from mm into cm for consistency
    py_df['fishlen'] = py_df['fishlen'] / 10
    py_df['current_frame'] = py_df['current_frame'].astype(np.int)

    # janky CSV parsing
    py_df['box_pts1'] = py_df['box_pts1'].map(lambda p: eval(p.replace(';', ','), np.__dict__))
    py_df['box_pts2'] = py_df['box_pts2'].map(lambda p: eval(p.replace(';', ','), np.__dict__))

    py_df['obox1'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
                      for pts in py_df['box_pts1']]
    py_df['obox2'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
                      for pts in py_df['box_pts2']]
    py_df.drop(['box_pts1', 'box_pts2'], axis=1, inplace=True)

    # Remap to matlab names
    py_df = py_df.rename(columns={
        'error': 'Err',
        'fishlen': 'fishLength',
        'range': 'fishRange',
    })

    # Load matlab results
    mat_df = _read_kresimir_results()

    FORCE_COMPARABLE_RANGE = True
    # FORCE_COMPARABLE_RANGE = False
    if FORCE_COMPARABLE_RANGE:
        # Be absolutely certain we are in comparable regions (may slightly bias
        # results, against python and in favor of matlab)
        min_frame = max(mat_df.current_frame.min(), py_df.current_frame.min())
        max_frame = min(mat_df.current_frame.max(), py_df.current_frame.max())
        print('min_frame = {!r}'.format(min_frame))
        print('max_frame = {!r}'.format(max_frame))

        mat_df = mat_df[(mat_df.current_frame >= min_frame) &
                        (mat_df.current_frame <= max_frame)]
        py_df = py_df[(py_df.current_frame >= min_frame) &
                      (py_df.current_frame <= max_frame)]

    intersect_frames = np.intersect1d(mat_df.current_frame, py_df.current_frame)
    print('intersecting frames = {} / {} (matlab)'.format(
        len(intersect_frames), len(set(mat_df.current_frame))))
    print('intersecting frames = {} / {} (python)'.format(
        len(intersect_frames), len(set(py_df.current_frame))))

    #  Reuse the hungarian algorithm implementation from ctalgo
    min_assign = ctalgo.FishStereoMeasurments.minimum_weight_assignment

    correspond = []
    for f in intersect_frames:
        pidxs = np.where(py_df.current_frame == f)[0]
        midxs = np.where(mat_df.current_frame == f)[0]

        pdf = py_df.iloc[pidxs]
        mdf = mat_df.iloc[midxs]

        ppts1 = np.array([o.center for o in pdf['obox1']])
        mpts1 = np.array([o.center for o in mdf['obox1']])

        ppts2 = np.array([o.center for o in pdf['obox2']])
        mpts2 = np.array([o.center for o in mdf['obox2']])

        dists1 = sklearn.metrics.pairwise.pairwise_distances(ppts1, mpts1)
        dists2 = sklearn.metrics.pairwise.pairwise_distances(ppts2, mpts2)

        # arbitrarilly chosen threshold
        thresh = 100
        for i, j in min_assign(dists1):
            d1 = dists1[i, j]
            d2 = dists2[i, j]
            if d1 < thresh and d2 < thresh and abs(d1 - d2) < thresh / 4:
                correspond.append((pidxs[i], midxs[j]))
    correspond = np.array(correspond)

    # pflags = np.array(ub.boolmask(correspond.T[0], len(py_df)))
    mflags = np.array(ub.boolmask(correspond.T[1], len(mat_df)))
    # print('there are {} detections that seem to be in common'.format(len(correspond)))
    # print('The QC flags of the common detections are:       {}'.format(
    #     ub.dict_hist(mat_df[mflags]['QC'].values)))
    # print('The QC flags of the other matlab detections are: {}'.format(
    #     ub.dict_hist(mat_df[~mflags]['QC'].values)))

    print('\n\n----\n## All stats\n')
    print(ub.codeblock(
        '''
        Overall, the matlab script made {nmat} length measurements and the
        python script made {npy} length measurements.  Here is a table
        summarizing the average lengths / ranges / errors of each script:
        ''').format(npy=len(py_df), nmat=len(mat_df)))
    stats = pd.DataFrame(columns=['python', 'matlab'])
    for key in ['fishLength', 'fishRange', 'Err']:
        stats.loc[key, 'python'] = '{:6.2f} ± {:6.2f}'.format(py_df[key].mean(), py_df[key].std())
        stats.loc[key, 'matlab'] = '{:6.2f} ± {:6.2f}'.format(mat_df[key].mean(), mat_df[key].std())

    stats.loc['nTotal', 'python'] = '{}'.format(len(py_df))
    stats.loc['nTotal', 'matlab'] = '{}'.format(len(mat_df))
    print(tabulate(stats, headers='keys', tablefmt='psql', stralign='right'))

    print('\n\n----\n## Only COMMON detections\n')
    py_df_c = py_df.iloc[correspond.T[0]]
    mat_df_c = mat_df.iloc[correspond.T[1]]
    stats = pd.DataFrame(columns=['python', 'matlab'])
    for key in ['fishLength', 'fishRange', 'Err']:
        stats.loc[key, 'python'] = '{:6.2f} ± {:6.2f}'.format(py_df_c[key].mean(), py_df_c[key].std())
        stats.loc[key, 'matlab'] = '{:6.2f} ± {:6.2f}'.format(mat_df_c[key].mean(), mat_df_c[key].std())

    stats.loc['nTotal', 'python'] = '{}'.format(len(py_df_c))
    stats.loc['nTotal', 'matlab'] = '{}'.format(len(mat_df_c))

    print(ub.codeblock(
        '''
        Now, we investigate how many dections matlab and python made in common.
        (Note, choosing which dections in one version correspond to which in
         another is done using a heuristic based on distances between bbox
         centers and a thresholded minimum assignment problem).

        Python made {npy_c}/{nmat} = {percent:.2f}% of the detections matlab made

        ''').format(npy_c=len(py_df_c), nmat=len(mat_df),
                    percent=100 * len(py_df_c) / len(mat_df)))
    print(tabulate(stats, headers='keys', tablefmt='psql', stralign='right'))

    print('\n\n----\n## Evaulation using the QC code\n')
    hist_hit = ub.dict_hist(mat_df[mflags]['QC'].values)
    hist_miss = ub.dict_hist(mat_df[~mflags]['QC'].values)
    print(ub.codeblock(
        '''
        However, not all of those matlab detections were good. Because we have
        detections in corrsepondences with each other we can assign the python
        detections QC codes.

        Here is a histogram of the QC codes for these python detections:
        {}
        (Note: read histogram as <QC-code>: <frequency>)

        Here is a histogram of the other matlab detections that python did not
        find:
        {}

        To summarize:
            python correctly rejected {:.2f}% of the matlab QC=0 detections
            python correctly accepted {:.2f}% of the matlab QC=1 detections
            python correctly accepted {:.2f}% of the matlab QC=2 detections

            Note, that because python made detections that matlab did not make,
            the remaining {} detections may be right or wrong, but there is
            no way to tell from this analysis.

        Lastly, here are the statistics for the common detections that had a
        non-zero QC code.
        ''').format(
            ub.repr2(hist_hit, nl=1),
            ub.repr2(hist_miss, nl=1),
            100 * hist_miss[0] / (hist_hit[0] + hist_miss[0]),
            100 * hist_hit[1] / (hist_hit[1] + hist_miss[1]),
            100 * hist_hit[2] / (hist_hit[2] + hist_miss[2]),
            len(py_df) - len(py_df_c)
                   )
    )

    is_qc = (mat_df_c['QC'] > 0).values
    mat_df_c = mat_df_c[is_qc]
    py_df_c = py_df_c[is_qc]
    stats = pd.DataFrame(columns=['python', 'matlab'])
    for key in ['fishLength', 'fishRange', 'Err']:
        stats.loc[key, 'python'] = '{:6.2f} ± {:6.2f}'.format(py_df_c[key].mean(), py_df_c[key].std())
        stats.loc[key, 'matlab'] = '{:6.2f} ± {:6.2f}'.format(mat_df_c[key].mean(), mat_df_c[key].std())

    stats.loc['nTotal', 'python'] = '{}'.format(len(py_df_c))
    stats.loc['nTotal', 'matlab'] = '{}'.format(len(mat_df_c))
    print(tabulate(stats, headers='keys', tablefmt='psql', stralign='right'))
Beispiel #34
0
 def _configure(self):
     logger.debug(' ----- configure ' + self.__class__.__name__)
     config = tmp_smart_cast_config(self)
     print('detector config = {}'.format(ub.repr2(config, nl=2)))
     self.detector = ctalgo.GMMForegroundObjectDetector(**config)
     self._base_configure()
Beispiel #35
0
def demo(config=None):
    """
    Runs the algorithm end-to-end.
    """
    # dataset = 'test'
    # dataset = 'haul83'

    if config is None:
        import argparse
        parser = argparse.ArgumentParser(description='Standalone camtrawl demo')

        parser.add_argument('--cal', help='path to matlab or numpy stereo calibration file', default='cal.npz')
        parser.add_argument('--left', help='path to directory containing left images', default='left')
        parser.add_argument('--right', help='path to directory containing right images', default='right')
        parser.add_argument('--out', help='output directory', default='./out')
        parser.add_argument('-f', '--overwrite', action='store_true', help='will delete any existing output')
        parser.add_argument('--draw', action='store_true', help='draw visualization of algorithm steps')

        parser.add_argument('--dataset', default=None,
                            help='Developer convenience assumes you have demo '
                                 ' data downloaded and available. If you dont '
                                 ' specify the other args.')

        args = parser.parse_args()
        config = args.__dict__.copy()
        config = FrozenKeyDict(config)

    if config['dataset'] is not None:
        img_path1, img_path2, cal_fpath = demodata_input(dataset=config['dataset'])
        config['left'] = img_path1
        config['right'] = img_path2
        config['cal'] = cal_fpath

    img_path1, img_path2, cal_fpath = ub.take(config, [
        'left', 'right', 'cal'])
    out_dpath = config['out']
    logging.info('Demo Config = {!r}'.format(config))

    ub.ensuredir(out_dpath)

    # ----
    # Choose parameter configurations
    # ----

    # Use GMM based model
    gmm_params = {
    }
    triangulate_params = {
    }

    DRAWING = config['draw']

    # ----
    # Initialize algorithms
    # ----

    detector1 = ctalgo.GMMForegroundObjectDetector(**gmm_params)
    detector2 = ctalgo.GMMForegroundObjectDetector(**gmm_params)
    triangulator = ctalgo.FishStereoMeasurments(**triangulate_params)

    try:
        import pyfiglet
        print(pyfiglet.figlet_format('CAMTRAWL', font='cybermedium'))
    except ImportError:
        logging.debug('pyfiglet is not installed')
        print('========')
        print('CAMTRAWL')
        print('========')
    logging.info('Detector1 Config: ' + ub.repr2(detector1.config, nl=1))
    logging.info('Detector2 Config: ' + ub.repr2(detector2.config, nl=1))
    logging.info('Triangulate Config: ' + ub.repr2(triangulator.config, nl=1))
    logging.info('DRAWING = {!r}'.format(DRAWING))

    cal = ctalgo.StereoCalibration.from_file(cal_fpath)

    stream = StereoFrameStream(img_path1, img_path2)
    stream.preload()

    # HACK IN A BEGIN FRAME
    if len(stream) > 2200:
        stream.seek(2200)

    # ----
    # Run the algorithm
    # ----

    # n_frames = 2000
    # stream.aligned_frameids = stream.aligned_frameids[:stream.index]

    measure_fpath = join(out_dpath, 'measurements.csv')
    if exists(measure_fpath):
        if config['overwrite']:
            ub.delete(measure_fpath)
        else:
            raise IOError('Measurement path already exists')
    output_file = open(measure_fpath, 'a')

    if DRAWING:
        drawing_dpath = join(out_dpath, 'visual')
        if exists(drawing_dpath):
            if config['overwrite']:
                ub.delete(drawing_dpath)
            else:
                raise IOError('Output path already exists')
        ub.ensuredir(drawing_dpath)

    headers = ['current_frame', 'fishlen', 'range', 'error', 'dz', 'box_pts1',
               'box_pts2']
    output_file.write(','.join(headers) + '\n')
    output_file.flush()

    measurements = []

    logger.info('begin camtrawl iteration')

    import tqdm
    # prog = ub.ProgIter(iter(stream), total=len(stream), desc='camtrawl demo',
    #                    clearline=False, freq=1, adjust=False)
    prog = tqdm.tqdm(iter(stream), total=len(stream), desc='camtrawl demo',
                     leave=True)

    def csv_repr(d):
        if isinstance(d, np.ndarray):
            d = d.tolist()
        s = repr(d)
        return s.replace('\n', '').replace(',', ';').replace(' ', '')

    for frame_num, (frame_id, img1, img2) in enumerate(prog):
        logger.debug('frame_num = {!r}'.format(frame_num))

        detections1 = list(detector1.detect(img1))
        detections2 = list(detector2.detect(img2))
        masks1 = detector1._masks
        masks2 = detector2._masks

        any_detected = len(detections1) > 0 or len(detections2) > 0

        if any_detected:
            assignment, assign_data, cand_errors = triangulator.find_matches(
                cal, detections1, detections2)
            # Append assignments to the measurements
            for data in assign_data:
                data['current_frame'] = int(frame_id)
                measurements.append(data)
                line = ','.join([csv_repr(d) for d in ub.take(data, headers)])
                output_file.write(line + '\n')
                output_file.flush()
        else:
            cand_errors = None
            assignment, assign_data = None, None

        if DRAWING >= 2 or (DRAWING and any_detected):
            DRAWING = 3
            stacked = DrawHelper.draw_stereo_detections(img1, detections1, masks1,
                                                        img2, detections2, masks2,
                                                        assignment, assign_data,
                                                        cand_errors)
            if cv2.__version__.startswith('2'):
                cv2.putText(stacked,
                            text='frame #{}, id={}'.format(frame_num,
                                                           frame_id),
                            org=(10, 50),
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=1, color=(255, 0, 0),
                            thickness=2, lineType=cv2.cv.CV_AA)
            else:
                stacked = cv2.putText(stacked,
                                      text='frame #{}, id={}'.format(frame_num,
                                                                     frame_id),
                                      org=(10, 50),
                                      fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                      fontScale=1, color=(255, 0, 0),
                                      thickness=2, lineType=cv2.LINE_AA)
            cv2.imwrite(drawing_dpath + '/mask{}_draw.png'.format(frame_id), stacked)
    output_file.close()

    n_total = len(measurements)
    logger.info('n_total = {!r}'.format(n_total))
    if n_total:
        all_errors = np.array([d['error'] for d in measurements])
        all_lengths = np.array([d['fishlen'] for d in measurements])
        logger.info('ave_error = {:.2f} +- {:.2f}'.format(all_errors.mean(), all_errors.std()))
        logger.info('ave_lengths = {:.2f} +- {:.2f} '.format(all_lengths.mean(), all_lengths.std()))
    return measurements
Beispiel #36
0
def _coerce_datasets(config):
    import netharn as nh
    import ndsampler
    import numpy as np
    from torchvision import transforms
    coco_datasets = nh.api.Datasets.coerce(config)
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'):
        sampler.frames.prepare(workers=config['workers'])

    # TODO: basic ndsampler torch dataset, likely has to support the transforms
    # API, bleh.

    transform = transforms.Compose([
        transforms.Resize(config['input_dims']),
        transforms.CenterCrop(config['input_dims']),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])

    torch_datasets = {
        key: SamplerDataset(
            sapmler, transform=transform,
            # input_dims=config['input_dims'],
            # augmenter=config['augmenter'] if key == 'train' else None,
        )
        for key, sapmler in samplers.items()
    }
    # self = torch_dset = torch_datasets['train']

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        import kwarray
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)

        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
        input_stats = cacher.tryload()

        from netharn.data.channel_spec import ChannelSpec
        channels = ChannelSpec.coerce(config['channels'])

        if input_stats is None:
            # Use parallel workers to load data faster
            from netharn.data.data_containers import container_collate
            from functools import partial
            collate_fn = partial(container_collate, num_devices=1)

            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=collate_fn,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])

            # Track moving average of each fused channel stream
            channel_stats = {key: nh.util.RunningStats()
                             for key in channels.keys()}
            assert len(channel_stats) == 1, (
                'only support one fused stream for now')
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                if isinstance(batch, (tuple, list)):
                    inputs = {'rgb': batch[0]}  # make assumption
                else:
                    inputs = batch['inputs']

                for key, val in inputs.items():
                    try:
                        for part in val.numpy():
                            channel_stats[key].update(part)
                    except ValueError:  # final batch broadcast error
                        pass

            perchan_input_stats = {}
            for key, running in channel_stats.items():
                running = ub.peek(channel_stats.values())
                perchan_stats = running.simple(axis=(1, 2))
                perchan_input_stats[key] = {
                    'std': perchan_stats['mean'].round(3),
                    'mean': perchan_stats['std'].round(3),
                }

            input_stats = ub.peek(perchan_input_stats.values())
            cacher.save(input_stats)
    else:
        input_stats = {}

    torch_loaders = {
        tag: dset.make_loader(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_workers=config['workers'],
            shuffle=(tag == 'train'),
            balance=(config['balance'] if tag == 'train' else None),
            pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    dataset_info = {
        'torch_datasets': torch_datasets,
        'torch_loaders': torch_loaders,
        'input_stats': input_stats
    }
    return dataset_info
Beispiel #37
0
def detect_cli(config={}):
    """
    CommandLine:
        python -m bioharn.detect_predict --help

    CommandLine:
        python -m bioharn.detect_predict \
            --dataset=~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_test.mscoco.json \
            --deployed=/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip \
            --out_dpath=~/work/bioharn/habcam_test_out \
            --draw=100 \
            --input_dims=512,512 \
            --xpu=0 --batch_size=1

    Ignore:
        >>> config = {}
        >>> config['dataset'] = '~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json'
        >>> config['deployed'] = '/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip'
        >>> config['out_dpath'] = 'out'
    """
    import kwarray
    import ndsampler
    from os.path import basename, join, exists, isfile, isdir  # NOQA

    config = DetectPredictCLIConfig(config, cmdline=True)
    print('config = {}'.format(ub.repr2(config.asdict())))

    out_dpath = ub.expandpath(config.get('out_dpath'))

    import six
    if isinstance(config['dataset'], six.string_types):
        if config['dataset'].endswith('.json'):
            dataset_fpath = ub.expandpath(config['dataset'])
            coco_dset = ndsampler.CocoDataset(dataset_fpath)
            # Running prediction is much faster if you can build a sampler.
            sampler_backend = {
                'type': 'cog',
                'config': {
                    'compress': 'JPEG',
                },
                '_hack_old_names': False,  # flip to true to use legacy caches
            }
            sampler_backend = None
            print('coco hashid = {}'.format(coco_dset._build_hashid()))
        else:
            sampler_backend = None
            if exists(config['dataset']) and isfile(config['dataset']):
                # Single image case
                image_fpath = ub.expandpath(config['dataset'])
                coco_dset = ndsampler.CocoDataset()
                coco_dset.add_image(image_fpath)
    elif isinstance(config['dataset'], list):
        # Multiple image case
        gpaths = config['dataset']
        gpaths = [ub.expandpath(g) for g in gpaths]
        coco_dset = ndsampler.CocoDataset()
        for gpath in gpaths:
            coco_dset.add_image(gpath)
    else:
        raise TypeError(config['dataset'])

    draw = config.get('draw')
    workdir = ub.expandpath(config.get('workdir'))

    det_outdir = ub.ensuredir((out_dpath, 'pred'))

    pred_config = ub.dict_subset(config, DetectPredictConfig.default)

    print('Create sampler')
    sampler = ndsampler.CocoSampler(coco_dset, workdir=workdir,
                                    backend=sampler_backend)
    print('prepare frames')
    sampler.frames.prepare(workers=config['workers'])

    print('Create predictor')
    predictor = DetectPredictor(pred_config)
    print('Ensure model')
    predictor._ensure_model()

    pred_dataset = coco_dset.dataset.copy()
    pred_dataset['annotations'] = []
    pred_dset = ndsampler.CocoDataset(pred_dataset)

    # self = predictor
    predictor.config['verbose'] = 1
    pred_gen = predictor.predict_sampler(sampler)
    buffered_gen = AsyncBufferedGenerator(pred_gen, size=coco_dset.n_images)

    gid_to_pred = {}
    prog = ub.ProgIter(buffered_gen, total=coco_dset.n_images,
                       desc='buffered detect')
    for img_idx, (gid, dets) in enumerate(prog):
        gid_to_pred[gid] = dets

        for ann in dets.to_coco():
            ann['image_id'] = gid
            try:
                catname = ann['category_name']
                ann['category_id'] = pred_dset._resolve_to_cid(catname)
            except KeyError:
                if 'category_id' not in ann:
                    cid = pred_dset.add_category(catname)
                    ann['category_id'] = cid
            pred_dset.add_annotation(**ann)

        single_img_coco = pred_dset.subset([gid])
        single_pred_dpath = ub.ensuredir((det_outdir, 'single_image'))
        single_pred_fpath = join(single_pred_dpath, 'detections_gid_{:08d}.mscoco.json'.format(gid))
        single_img_coco.dump(single_pred_fpath, newlines=True)

        if draw is True or (draw and img_idx < draw):
            draw_outdir = ub.ensuredir((out_dpath, 'draw'))
            img_fpath = coco_dset.load_image_fpath(gid)
            gname = basename(img_fpath)
            viz_fname = ub.augpath(gname, prefix='detect_', ext='.jpg')
            viz_fpath = join(draw_outdir, viz_fname)

            image = kwimage.imread(img_fpath)

            flags = dets.scores > .2
            flags[kwarray.argmaxima(dets.scores, num=10)] = True
            top_dets = dets.compress(flags)
            toshow = top_dets.draw_on(image, alpha=None)
            # kwplot.imshow(toshow)
            kwimage.imwrite(viz_fpath, toshow, space='rgb')

    pred_fpath = join(det_outdir, 'detections.mscoco.json')
    print('Dump detections to pred_fpath = {!r}'.format(pred_fpath))
    pred_dset.dump(pred_fpath, newlines=True)
Beispiel #38
0
                        dpath = expandvars(expanduser(dpath))

                        # try:
                        #     os.makedirs(dpath, exist_ok=True)
                        # except Exception:
                        #     ub.ensuredir(dpath)
                        if not os.path.exists(dpath):
                            os.makedirs(dpath)

                    dpath_to_url[dpath].append(url)
    return dpath_to_url


def update_urls():
    global PROJECT_URLS
    global PROJECT_REPOS
    for dpath, urls in _parse_custom_urls().items():
        print('urls = {!r}'.format(urls))
        repos_urls, repos = repo_list(urls, dpath)

        PROJECT_URLS += repos_urls
        PROJECT_REPOS += repos


update_urls()
# print('PROJECT_URLS = {!r}'.format(PROJECT_URLS))
try:
    print('PROJECT_REPOS = {}'.format(ub.repr2(PROJECT_REPOS)))
except NameError:
    pass