Example #1
0
def motherboard_info():
    """
    REQUIRES SUDO

    xdoctest -m ~/misc/notes/buildapc.py motherboard_info
    """
    import re
    info = ub.cmd('sudo dmidecode -t 9')
    pcie_slots = []
    chunks = info['out'].split('\n\n')
    for chunk in chunks:
        item = {}
        for line in chunk.split('\n'):
            # doesn't get all data correctly (e.g. characteristics)
            parts = re.split('\t*:', line, maxsplit=1)
            if len(parts) == 2:
                key, val = parts
                key = key.strip()
                val = val.strip()
                if key in item:
                    raise KeyError(f'key={key} already exists')
                item[key] = val
        if item:
            item = ub.map_keys(slugify_key, item)
            pcie_slots.append(item)

    pcie_usage = ub.dict_hist(item['current_usage'] for item in pcie_slots)

    _varied = varied_values(pcie_slots, min_variations=0)
    _varied = ub.map_keys(slugify_key, _varied)
    unvaried = {k: ub.peek(v) for k, v in _varied.items() if len(v) == 1}
    varied = {k: v for k, v in _varied.items() if len(v) > 1}

    print(info['out'])
Example #2
0
    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if not other_keys.intersection(self_keys):
            prefix = 'module.'

            def smap(f, ss):
                return set(map(f, ss))

            def fix1(k):
                return prefix + k

            def fix2(k):
                if k.startswith(prefix):
                    return k[len(prefix):]

            if smap(fix1, other_keys).intersection(self_keys):
                model_state_dict = ub.map_keys(fix1, model_state_dict)
            elif smap(fix2, other_keys).intersection(self_keys):
                model_state_dict = ub.map_keys(fix2, model_state_dict)

        return model_state_dict
Example #3
0
class QUAL(object):
    EXCELLENT = 5
    GOOD = 4
    OK = 3
    POOR = 2
    JUNK = 1
    UNKNOWN = None

    INT_TO_CODE = ub.odict([
        (EXCELLENT, 'excellent'),
        (GOOD, 'good'),
        (OK, 'ok'),
        (POOR, 'poor'),
        (JUNK, 'junk'),
        (UNKNOWN, 'unspecified'),
    ])

    INT_TO_NICE = ub.odict([
        (EXCELLENT, 'Excellent'),
        (GOOD, 'Good'),
        (OK, 'OK'),
        (POOR, 'Poor'),
        (JUNK, 'Junk'),
        (UNKNOWN, 'Unspecified'),
    ])

    CODE_TO_NICE = ub.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ub.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ub.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ub.invert_dict(INT_TO_NICE)
Example #4
0
class CONFIDENCE(object):
    UNKNOWN = None
    GUESSING = 1
    NOT_SURE = 2
    PRETTY_SURE = 3
    ABSOLUTELY_SURE = 4

    INT_TO_CODE = ub.odict([
        (ABSOLUTELY_SURE, 'absolutely_sure'),
        (PRETTY_SURE, 'pretty_sure'),
        (NOT_SURE, 'not_sure'),
        (GUESSING, 'guessing'),
        (UNKNOWN, 'unspecified'),
    ])

    INT_TO_NICE = ub.odict([
        (ABSOLUTELY_SURE, 'Doubtless'),
        (PRETTY_SURE, 'Sure'),
        (NOT_SURE, 'Unsure'),
        (GUESSING, 'Guessing'),
        (UNKNOWN, 'Unspecified'),
    ])

    CODE_TO_NICE = ub.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ub.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ub.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ub.invert_dict(INT_TO_NICE)
Example #5
0
class EVIDENCE_DECISION(object):
    """
    TODO: change to EVIDENCE_DECISION / VISUAL_DECISION
    Enumerated types of review codes and texts

    Notes:
        Unreviewed: Not comparared yet.
        nomatch: Visually comparable and the different
        match: Visually comparable and the same
        notcomp: Not comparable means it is actually impossible to determine.
        unknown: means that it was reviewed, but we just can't figure it out.
    """
    UNREVIEWED = None
    NEGATIVE = 0
    POSITIVE = 1
    INCOMPARABLE = 2
    UNKNOWN = 3

    INT_TO_CODE = ub.odict([
        # (POSITIVE       , 'match'),
        # (NEGATIVE       , 'nomatch'),
        # (INCOMPARABLE   , 'notcomp'),
        # (POSITIVE       , 'positive'),
        # (NEGATIVE       , 'negative'),
        # (INCOMPARABLE   , 'incomparable'),
        # (UNKNOWN        , 'unknown'),
        # (UNREVIEWED     , 'unreviewed'),
        (POSITIVE, 'POSTV'),
        (NEGATIVE, 'NEGTV'),
        (INCOMPARABLE, 'INCMP'),
        (UNKNOWN, 'UNKWN'),
        (UNREVIEWED, 'UNREV'),
    ])

    INT_TO_NICE = ub.odict([
        (POSITIVE, 'Positive'),
        (NEGATIVE, 'Negative'),
        (INCOMPARABLE, 'Incomparable'),
        (UNKNOWN, 'Unknown'),
        (UNREVIEWED, 'Unreviewed'),
    ])

    CODE_TO_NICE = ub.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ub.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ub.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ub.invert_dict(INT_TO_NICE)

    MATCH_CODE = CODE_TO_INT
Example #6
0
def parse_cpu_info(percore=False):
    """
    Get a nice summary of CPU information

    Requirements:
        pip install python-slugify

    Ignore:
        cpu_info = parse_cpu_info()
        print(cpu_info['varied']['cpu_mhz'])
        print('cpu_info = {}'.format(ub.repr2(cpu_info, nl=3)))

    Notes:
        * lscpu
    """
    # ALSO
    import cpuinfo
    cpu_info = cpuinfo.get_cpu_info()

    import re
    info = ub.cmd('cat /proc/cpuinfo')
    cpu_lines = info['out'].split('\n\n')
    cores = []
    for lines in cpu_lines:
        core = {}
        for line in lines.split('\n'):
            parts = re.split('\t*:', line, maxsplit=1)
            if len(parts) == 2:
                key, val = parts
                key = key.strip()
                val = val.strip()
                if key in core:
                    raise KeyError(f'key={key} already exists')
                core[key] = val
        if len(core):
            core = ub.map_keys(slugify_key, core)
            cores.append(core)
    _varied = varied_values(cores, min_variations=0)
    unvaried = {k: ub.peek(v) for k, v in _varied.items() if len(v) == 1}
    varied = {k: v for k, v in _varied.items() if len(v) > 1}

    cpu_info = {
        'varied': varied,
        'unvaried': unvaried,
    }
    if percore:
        cpu_info['cores'] = cores
    return cpu_info
Example #7
0
def generate_phase1_data_tables():
    cfg = viame_wrangler.config.WrangleConfig({
        'annots':
        ub.truepath(
            '~/data/viame-challenge-2018/phase1-annotations/*/*coarse*bbox-keypoint*.json'
        )
    })

    all_stats = {}

    annots = cfg.annots
    fpaths = list(glob.glob(annots))
    print('fpaths = {}'.format(ub.repr2(fpaths)))
    for fpath in fpaths:
        dset_name = os.path.basename(fpath).split('-')[0]
        dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name)

        assert not dset.missing_images()
        assert not dset._find_bad_annotations()
        assert all([
            img['has_annots'] in [True, False, None]
            for img in dset.imgs.values()
        ])

        print(ub.dict_hist([g['has_annots'] for g in dset.imgs.values()]))

        stats = {}
        stats.update(ub.dict_subset(dset.basic_stats(), ['n_anns', 'n_imgs']))

        roi_shapes_hist = dict()
        populated_cats = dict()
        for name, item in dset.category_annotation_type_frequency().items():
            if item:
                populated_cats[name] = sum(item.values())
                for k, v in item.items():
                    roi_shapes_hist[k] = roi_shapes_hist.get(k, 0) + v

        stats['n_cats'] = populated_cats
        stats['n_roi_shapes'] = roi_shapes_hist
        stats['n_imgs_with_annots'] = ub.map_keys(
            {
                None: 'unsure',
                True: 'has_objects',
                False: 'no_objects'
            }, ub.dict_hist([g['has_annots'] for g in dset.imgs.values()]))
        all_stats[dset_name] = stats

    print(ub.repr2(all_stats, nl=3, sk=1))
Example #8
0
    def category_annotation_type_frequency(self):
        """
        Reports the number of annotations of each type for each category

        Example:
            >>> dataset = demo_coco_data()
            >>> self = CocoDataset(dataset, tag='demo')
            >>> hist = self.category_annotation_frequency()
            >>> print(ub.repr2(hist))
        """
        catname_to_nannot_types = {}
        for cid, aids in self.cid_to_aids.items():
            name = self.cats[cid]['name']
            hist = ub.dict_hist(map(annot_type, ub.take(self.anns, aids)))
            catname_to_nannot_types[name] = ub.map_keys(
                lambda k: k[0] if len(k) == 1 else k, hist)
        return catname_to_nannot_types
Example #9
0
    def category_annotation_frequency(self):
        """
        Reports the number of annotations of each category

        Example:
            >>> dataset = demo_coco_data()
            >>> self = CocoDataset(dataset, tag='demo')
            >>> hist = self.category_annotation_frequency()
            >>> print(ub.repr2(hist))
            {
                'astroturf': 0,
                'astronaut': 1,
                'astronomer': 1,
                'helmet': 1,
                'rocket': 1,
                'mouth': 2,
                'star': 5,
            }
        """
        catname_to_nannots = ub.map_keys(lambda x: self.cats[x]['name'],
                                         ub.map_vals(len, self.cid_to_aids))
        catname_to_nannots = ub.odict(
            sorted(catname_to_nannots.items(), key=lambda kv: (kv[1], kv[0])))
        return catname_to_nannots
Example #10
0
class META_DECISION(object):
    """
    Enumerated types of review codes and texts

    Notes:
        unreviewed: we dont have a meta decision
        same: we know this is the same animal through non-visual means
        diff: we know this is the different animal through non-visual means

    Example:
        >>> assert hasattr(META_DECISION, 'CODE')
        >>> assert hasattr(META_DECISION, 'NICE')
        >>> code1 = META_DECISION.INT_TO_CODE[META_DECISION.NULL]
        >>> code2 = META_DECISION.CODE.NULL
        >>> assert code1 == code2
        >>> nice1 = META_DECISION.INT_TO_NICE[META_DECISION.NULL]
        >>> nice2 = META_DECISION.NICE.NULL
        >>> assert nice1 == nice2
    """
    NULL = None
    DIFF = 0
    SAME = 1
    INT_TO_CODE = ub.odict([
        (NULL, 'null'),
        (DIFF, 'diff'),
        (SAME, 'same'),
    ])
    INT_TO_NICE = ub.odict([
        (NULL, 'NULL'),
        (DIFF, 'Different'),
        (SAME, 'Same'),
    ])
    CODE_TO_NICE = ub.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ub.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ub.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ub.invert_dict(INT_TO_NICE)
Example #11
0
def symbolic_confusion(**known):
    """
    Example:
        >>> known = {'tp': 10, 'fp': 100, 'total': 1000}
        >>> solution = symbolic_confusion(**known)
        >>> print(ub.repr2(solution))
        >>> known = {'tp': 10, 'fp': 100, 'fn': 1}
        >>> solution = symbolic_confusion(**known)
        >>> print(ub.repr2(solution))
        >>> print(ub.repr2(ub.dict_subset(solution, ['fpr']), nl=2))
        >>> known = {'tp': 10, 'fp': 100, 'fn': 1, 'total': 1000}
        >>> solution = symbolic_confusion(**known)
        >>> print(ub.repr2(solution))
        >>> print(ub.repr2(ub.dict_subset(solution, ['fpr']), nl=2))


        >>> known = {'tp': 10, 'pp': 12, 'rp': 15, 'total': 500}
        >>> solution = symbolic_confusion(**known)
        >>> print(ub.repr2(solution))
    """
    import sympy as sym
    all_vars = {}

    def symbols(names, mode):
        if mode == 'nonneg-int':
            flags = {'is_nonnegative': True, 'is_integer': True}
        elif mode == 'nonneg-real':
            flags = {'is_nonnegative': True, 'is_real': True}
        elif mode == 'real':
            flags = {'is_real': True}
        else:
            flags = {}
        out = sym.symbols(names, **flags)
        if isinstance(out, tuple):
            variables = out
        else:
            variables = [out]
        for v in variables:
            all_vars[v.name] = v
        return out

    # Core variables
    (tp, fp, fn, tn) = symbols('tp, fp, fn, tn', 'nonneg-int')
    (rp, pp, rn, pn) = symbols('rp, pp, rn, pn', 'nonneg-int')
    total = symbols('total', 'nonneg-int')

    # Derived varaibles
    tpr, tnr, fnr, fpr = symbols('tpr, tnr, fnr, fpr', 'nonneg-real')
    acc, fdr, for_ = symbols('acc, fdr, for', 'nonneg-real')
    ppv, npv = symbols('ppv, npv', 'nonneg-real')
    mcc, bm, mk, f1 = symbols('mcc, bm, mk, f1', 'real')

    # Aliases
    all_vars['precision'] = ppv
    all_vars['recall'] = tpr

    core_constraints = [
        sym.Eq(total, tp + fp + fn + tn),
        sym.Eq(total, pp + pn),
        sym.Eq(total, rp + rn),
        sym.Eq(rp, tp + fn),
        sym.Eq(rn, tn + fp),
        sym.Eq(pp, tp + fp),
        sym.Eq(pn, tn + fn),
    ]

    #
    # TODO: can we get the functional form of how to compute the other base
    # constants if we specify what we have? (we need 4 dof I think)
    # if False:
    #     sym.solve(core_constraints, [tp, pp, rp, total])
    #     implicit=1)

    derived_constraints = [
        sym.Eq(tpr, tp / (tp + fn)),
        sym.Eq(tnr, tn / (tn + fp)),
        sym.Eq(fnr, fn / (fn + tp)),
        sym.Eq(fpr, tp / (tp + tn)),
        sym.Eq(ppv, tp / (tp + fp)),
        sym.Eq(npv, tn / (tn + fn)),
        sym.Eq(for_, fn / (fn + tn)),
        sym.Eq(fdr, fp / (fp + tp)),
        sym.Eq(acc, (tp + tn) / (rp + rn)),
        sym.Eq(f1, 2 * tp / (2 * tp + fp + fn)),
        # using mcc messes up linear equations
        # sym.Eq(mcc, tp * tn - fp * fn / sym.sqrt((tp + fp) * (tp - fn) * (tn + fp) * (tn + fn))),
        sym.Eq(bm, tpr + tnr - 1),
        sym.Eq(mk, ppv + npv - 1),
    ]
    import ubelt as ub

    # Enter as much information as you know
    known_dict = ub.map_keys(all_vars.__getitem__, known)
    known_info = [sym.Eq(k, v) for k, v in known_dict.items()]

    # setup a system of equations representing all constraints
    system = core_constraints + derived_constraints + known_info

    try:
        soln1_ = sym.solve(system)
        soln1 = soln1_[0]
    except:
        soln1 = sym.solve(system, implicit=1)

    solution = {k.name: v for k, v in soln1.items()}

    try:
        solution = {k: v.evalf() for k, v in solution.items()}
    except:
        pass

    return solution
Example #12
0
def make_baseline_truthfiles():
    work_dir = ub.truepath('~/work')
    data_dir = ub.truepath('~/data')

    challenge_data_dir = join(data_dir, 'viame-challenge-2018')
    challenge_work_dir = join(work_dir, 'viame-challenge-2018')

    ub.ensuredir(challenge_work_dir)

    img_root = join(challenge_data_dir, 'phase0-imagery')
    annot_dir = join(challenge_data_dir, 'phase0-annotations')
    fpaths = list(glob.glob(join(annot_dir, '*.json')))
    # ignore the non-bounding box nwfsc and afsc datasets for now

    # exclude = ('nwfsc', 'afsc', 'mouss', 'habcam')
    # exclude = ('nwfsc', 'afsc', 'mouss',)
    # fpaths = [p for p in fpaths if not basename(p).startswith(exclude)]

    import json
    dsets = ub.odict()
    for fpath in fpaths:
        key = basename(fpath).split('.')[0]
        dsets[key] = json.load(open(fpath, 'r'))

    print('Merging')
    merged = coco_union(dsets)

    merged_fpath = join(challenge_work_dir, 'phase0-merged.mscoco.json')
    with open(merged_fpath, 'w') as fp:
        json.dump(merged, fp, indent=4)

    import copy
    self = CocoDataset(copy.deepcopy(merged), img_root=img_root, autobuild=False)
    self._build_index()
    self.run_fixes()

    if True:
        # remove all point annotations
        print('Remove point annotations')
        to_remove = []
        for ann in self.dataset['annotations']:
            if ann['roi_shape'] == 'point':
                to_remove.append(ann)
        for ann in to_remove:
            self.dataset['annotations'].remove(ann)
        self._build_index()

        # remove empty images
        print('Remove empty images')
        to_remove = []
        for gid in self.imgs.keys():
            aids = self.gid_to_aids.get(gid, [])
            if not aids:
                to_remove.append(self.imgs[gid])
        for img in to_remove:
            self.dataset['images'].remove(img)
        self._build_index()

    print('# self.anns = {!r}'.format(len(self.anns)))
    print('# self.imgs = {!r}'.format(len(self.imgs)))
    print('# self.cats = {!r}'.format(len(self.cats)))

    catname_to_nannots = ub.map_keys(lambda x: self.cats[x]['name'],
                                     ub.map_vals(len, self.cid_to_aids))
    catname_to_nannots = ub.odict(sorted(catname_to_nannots.items(),
                                         key=lambda kv: kv[1]))
    print(ub.repr2(catname_to_nannots))

    if False:
        # aid = list(self.anns.values())[0]['id']
        # self.show_annotation(aid)
        gids = sorted([gid for gid, aids in self.gid_to_aids.items() if aids])
        # import utool as ut
        # for gid in ut.InteractiveIter(gids):
        for gid in gids:
            from matplotlib import pyplot as plt
            fig = plt.figure(1)
            fig.clf()
            self.show_annotation(gid=gid)
            fig.canvas.draw()

        for ann in self.anns.values():
            primary_aid = ann['id']
            print('primary_aid = {!r}'.format(primary_aid))
            print(len(self.gid_to_aids[ann['image_id']]))

            if 'roi_shape' not in ann:
                ann['roi_shape'] = 'bounding_box'

            if ann['roi_shape'] == 'boundingBox':
                pass

            if ann['roi_shape'] == 'point':
                primary_aid = ann['id']
                print('primary_aid = {!r}'.format(primary_aid))
                print(len(self.gid_to_aids[ann['image_id']]))
                break

    # Split into train / test  set
    print('Splitting')
    skf = StratifiedGroupKFold(n_splits=2)
    groups = [ann['image_id'] for ann in self.anns.values()]
    y = [ann['category_id'] for ann in self.anns.values()]
    X = [ann['id'] for ann in self.anns.values()]
    split = list(skf.split(X=X, y=y, groups=groups))[0]
    train_idx, test_idx = split

    print('Taking subsets')
    aid_to_gid = {aid: ann['image_id'] for aid, ann in self.anns.items()}
    train_aids = list(ub.take(X, train_idx))
    test_aids = list(ub.take(X, test_idx))
    train_gids = sorted(set(ub.take(aid_to_gid, train_aids)))
    test_gids = sorted(set(ub.take(aid_to_gid, test_aids)))

    train_dset = self.subset(train_gids)
    test_dset = self.subset(test_gids)

    print('---------')
    print('# train_dset.anns = {!r}'.format(len(train_dset.anns)))
    print('# train_dset.imgs = {!r}'.format(len(train_dset.imgs)))
    print('# train_dset.cats = {!r}'.format(len(train_dset.cats)))
    print('---------')
    print('# test_dset.anns = {!r}'.format(len(test_dset.anns)))
    print('# test_dset.imgs = {!r}'.format(len(test_dset.imgs)))
    print('# test_dset.cats = {!r}'.format(len(test_dset.cats)))

    train_dset._ensure_imgsize()
    test_dset._ensure_imgsize()

    print('Writing')
    with open(join(challenge_work_dir, 'phase0-merged-train.mscoco.json'), 'w') as fp:
        json.dump(train_dset.dataset, fp, indent=4)

    with open(join(challenge_work_dir, 'phase0-merged-test.mscoco.json'), 'w') as fp:
        json.dump(test_dset.dataset, fp, indent=4)

    # Make a detectron yaml file
    config_text = ub.codeblock(
        """
        MODEL:
          TYPE: generalized_rcnn
          CONV_BODY: ResNet.add_ResNet50_conv4_body
          NUM_CLASSES: {num_classes}
          FASTER_RCNN: True
        NUM_GPUS: 1
        SOLVER:
          WEIGHT_DECAY: 0.0001
          LR_POLICY: steps_with_decay
          BASE_LR: 0.01
          GAMMA: 0.1
          # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
          MAX_ITER: 180000
          STEPS: [0, 120000, 160000]
        RPN:
          SIZES: (32, 64, 128, 256, 512)
        FAST_RCNN:
          ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
          ROI_XFORM_METHOD: RoIAlign
        TRAIN:
          WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
          DATASETS: ('/work/viame-challenge-2018/phase0-merged-train.mscoco.json',)
          IM_DIR: '/data/viame-challenge-2018/phase0-imagery'
          SCALES: (800,)
          MAX_SIZE: 1333
          IMS_PER_BATCH: 1
          BATCH_SIZE_PER_IM: 512
        TEST:
          DATASETS: ('/work/viame-challenge-2018/phase0-merged-test.mscoco.json',)
          IM_DIR: '/data/viame-challenge-2018/phase0-imagery'
          SCALES: (800,)
          MAX_SIZE: 1333
          NMS: 0.5
          RPN_PRE_NMS_TOP_N: 6000
          RPN_POST_NMS_TOP_N: 1000
        OUTPUT_DIR: /work/viame-challenge-2018/output
        """)
    config_text = config_text.format(
        num_classes=len(self.cats),
    )
    ub.writeto(join(challenge_work_dir, 'phase0-faster-rcnn.yaml'), config_text)

    docker_cmd = ('nvidia-docker run '
                  '-v {work_dir}:/work -v {data_dir}:/data '
                  '-it detectron:c2-cuda9-cudnn7 bash').format(
                      work_dir=work_dir, data_dir=data_dir)

    train_cmd = ('python2 tools/train_net.py '
                 '--cfg /work/viame-challenge-2018/phase0-faster-rcnn.yaml '
                 'OUTPUT_DIR /work/viame-challenge-2018/output')

    hacks = ub.codeblock(
        """
        git remote add Erotemic https://github.com/Erotemic/Detectron.git
        git fetch --all
        git checkout general_dataset

        # curl https://github.com/Erotemic/Detectron/blob/42d44b2d155c775dc509b6a44518d0c582f8cdf5/tools/train_net.py
        # wget https://github.com/Erotemic/Detectron/blob/42d44b2d155c775dc509b6a44518d0c582f8cdf5/lib/core/config.py
        """)

    print(docker_cmd)
    print(train_cmd)
Example #13
0
def 字典_根据健重建(func, dict_):
    data = ub.map_keys(func, dict_)
    return data
Example #14
0
    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)
        common_keys = other_keys.intersection(self_keys)
        if not common_keys:
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association == 'embedding':
                if verbose > 1:
                    print(
                        'Using subpath embedding assocation, may take some time'
                    )
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)
                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1, paths2)
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                               model_state_dict)
            else:
                raise KeyError(association)
        return model_state_dict
Example #15
0
class VIEW(object):
    """
    categorical viewpoint using the faces of a Rhombicuboctahedron

    References:
        https://en.wikipedia.org/wiki/Rhombicuboctahedron
    """
    UNKNOWN = None
    R = 1
    FR = 2
    F = 3
    FL = 4
    L = 5
    BL = 6
    B = 7
    BR = 8

    U = 9
    UF = 10
    UB = 11
    UL = 12
    UR = 13
    UFL = 14
    UFR = 15
    UBL = 16
    UBR = 17

    D = 18
    DF = 19
    DB = 20
    DL = 21
    DR = 22
    DFL = 23
    DFR = 24
    DBL = 25
    DBR = 26

    INT_TO_CODE = ub.odict([
        (UNKNOWN, 'unknown'),
        (R, 'right'),
        (FR, 'frontright'),
        (F, 'front'),
        (FL, 'frontleft'),
        (L, 'left'),
        (BL, 'backleft'),
        (B, 'back'),
        (BR, 'backright'),
        (U, 'up'),
        (UF, 'upfront'),
        (UB, 'upback'),
        (UL, 'upleft'),
        (UR, 'upright'),
        (UFL, 'upfrontleft'),
        (UFR, 'upfrontright'),
        (UBL, 'upbackleft'),
        (UBR, 'upbackright'),
        (D, 'down'),
        (DF, 'downfront'),
        (DB, 'downback'),
        (DL, 'downleft'),
        (DR, 'downright'),
        (DFL, 'downfrontleft'),
        (DFR, 'downfrontright'),
        (DBL, 'downbackleft'),
        (DBR, 'downbackright'),
    ])

    INT_TO_NICE = ub.odict([
        (UNKNOWN, 'Unknown'),
        (R, 'Right'),
        (FR, 'Front-Right'),
        (F, 'Front'),
        (FL, 'Front-Left'),
        (L, 'Left'),
        (BL, 'Back-Left'),
        (B, 'Back'),
        (BR, 'Back-Right'),
        (U, 'Up'),
        (UF, 'Up-Front'),
        (UB, 'Up-Back'),
        (UL, 'Up-Left'),
        (UR, 'Up-Right'),
        (UFL, 'Up-Front-Left'),
        (UFR, 'Up-Front-Right'),
        (UBL, 'Up-Back-Left'),
        (UBR, 'Up-Back-Right'),
        (D, 'Down'),
        (DF, 'Down-Front'),
        (DB, 'Down-Back'),
        (DL, 'Down-Left'),
        (DR, 'Down-Right'),
        (DFL, 'Down-Front-Left'),
        (DFR, 'Down-Front-Right'),
        (DBL, 'Down-Back-Left'),
        (DBR, 'Down-Back-Right'),
    ])

    CODE_TO_NICE = ub.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ub.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ub.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ub.invert_dict(INT_TO_NICE)

    DIST = {
        # DIST 0 PAIRS
        (B, B): 0,
        (BL, BL): 0,
        (BR, BR): 0,
        (D, D): 0,
        (DB, DB): 0,
        (DBL, DBL): 0,
        (DBR, DBR): 0,
        (DF, DF): 0,
        (DFL, DFL): 0,
        (DFR, DFR): 0,
        (DL, DL): 0,
        (DR, DR): 0,
        (F, F): 0,
        (FL, FL): 0,
        (FR, FR): 0,
        (L, L): 0,
        (R, R): 0,
        (U, U): 0,
        (UB, UB): 0,
        (UBL, UBL): 0,
        (UBR, UBR): 0,
        (UF, UF): 0,
        (UFL, UFL): 0,
        (UFR, UFR): 0,
        (UL, UL): 0,
        (UR, UR): 0,

        # DIST 1 PAIRS
        (B, BL): 1,
        (B, BR): 1,
        (B, DB): 1,
        (B, DBL): 1,
        (B, DBR): 1,
        (B, UB): 1,
        (B, UBL): 1,
        (B, UBR): 1,
        (BL, DBL): 1,
        (BL, L): 1,
        (BL, UBL): 1,
        (BR, DBR): 1,
        (BR, R): 1,
        (BR, UBR): 1,
        (D, DB): 1,
        (D, DBL): 1,
        (D, DBR): 1,
        (D, DF): 1,
        (D, DFL): 1,
        (D, DFR): 1,
        (D, DL): 1,
        (D, DR): 1,
        (DB, DBL): 1,
        (DB, DBR): 1,
        (DBL, DL): 1,
        (DBL, L): 1,
        (DBR, DR): 1,
        (DBR, R): 1,
        (DF, DFL): 1,
        (DF, DFR): 1,
        (DF, F): 1,
        (DFL, DL): 1,
        (DFL, F): 1,
        (DFL, FL): 1,
        (DFL, L): 1,
        (DFR, DR): 1,
        (DFR, F): 1,
        (DFR, FR): 1,
        (DFR, R): 1,
        (DL, L): 1,
        (DR, R): 1,
        (F, FL): 1,
        (F, FR): 1,
        (F, UF): 1,
        (F, UFL): 1,
        (F, UFR): 1,
        (FL, L): 1,
        (FL, UFL): 1,
        (FR, R): 1,
        (FR, UFR): 1,
        (L, UBL): 1,
        (L, UFL): 1,
        (L, UL): 1,
        (R, UBR): 1,
        (R, UFR): 1,
        (R, UR): 1,
        (U, UB): 1,
        (U, UBL): 1,
        (U, UBR): 1,
        (U, UF): 1,
        (U, UFL): 1,
        (U, UFR): 1,
        (U, UL): 1,
        (U, UR): 1,
        (UB, UBL): 1,
        (UB, UBR): 1,
        (UBL, UL): 1,
        (UBR, UR): 1,
        (UF, UFL): 1,
        (UF, UFR): 1,
        (UFL, UL): 1,
        (UFR, UR): 1,

        # DIST 2 PAIRS
        (B, D): 2,
        (B, DL): 2,
        (B, DR): 2,
        (B, L): 2,
        (B, R): 2,
        (B, U): 2,
        (B, UL): 2,
        (B, UR): 2,
        (BL, BR): 2,
        (BL, D): 2,
        (BL, DB): 2,
        (BL, DBR): 2,
        (BL, DFL): 2,
        (BL, DL): 2,
        (BL, FL): 2,
        (BL, U): 2,
        (BL, UB): 2,
        (BL, UBR): 2,
        (BL, UFL): 2,
        (BL, UL): 2,
        (BR, D): 2,
        (BR, DB): 2,
        (BR, DBL): 2,
        (BR, DFR): 2,
        (BR, DR): 2,
        (BR, FR): 2,
        (BR, U): 2,
        (BR, UB): 2,
        (BR, UBL): 2,
        (BR, UFR): 2,
        (BR, UR): 2,
        (D, F): 2,
        (D, FL): 2,
        (D, FR): 2,
        (D, L): 2,
        (D, R): 2,
        (DB, DF): 2,
        (DB, DFL): 2,
        (DB, DFR): 2,
        (DB, DL): 2,
        (DB, DR): 2,
        (DB, L): 2,
        (DB, R): 2,
        (DB, UB): 2,
        (DB, UBL): 2,
        (DB, UBR): 2,
        (DBL, DBR): 2,
        (DBL, DF): 2,
        (DBL, DFL): 2,
        (DBL, DFR): 2,
        (DBL, DR): 2,
        (DBL, FL): 2,
        (DBL, UB): 2,
        (DBL, UBL): 2,
        (DBL, UBR): 2,
        (DBL, UFL): 2,
        (DBL, UL): 2,
        (DBR, DF): 2,
        (DBR, DFL): 2,
        (DBR, DFR): 2,
        (DBR, DL): 2,
        (DBR, FR): 2,
        (DBR, UB): 2,
        (DBR, UBL): 2,
        (DBR, UBR): 2,
        (DBR, UFR): 2,
        (DBR, UR): 2,
        (DF, DL): 2,
        (DF, DR): 2,
        (DF, FL): 2,
        (DF, FR): 2,
        (DF, L): 2,
        (DF, R): 2,
        (DF, UF): 2,
        (DF, UFL): 2,
        (DF, UFR): 2,
        (DFL, DFR): 2,
        (DFL, DR): 2,
        (DFL, FR): 2,
        (DFL, UBL): 2,
        (DFL, UF): 2,
        (DFL, UFL): 2,
        (DFL, UFR): 2,
        (DFL, UL): 2,
        (DFR, DL): 2,
        (DFR, FL): 2,
        (DFR, UBR): 2,
        (DFR, UF): 2,
        (DFR, UFL): 2,
        (DFR, UFR): 2,
        (DFR, UR): 2,
        (DL, DR): 2,
        (DL, F): 2,
        (DL, FL): 2,
        (DL, UBL): 2,
        (DL, UFL): 2,
        (DL, UL): 2,
        (DR, F): 2,
        (DR, FR): 2,
        (DR, UBR): 2,
        (DR, UFR): 2,
        (DR, UR): 2,
        (F, L): 2,
        (F, R): 2,
        (F, U): 2,
        (F, UL): 2,
        (F, UR): 2,
        (FL, FR): 2,
        (FL, U): 2,
        (FL, UBL): 2,
        (FL, UF): 2,
        (FL, UFR): 2,
        (FL, UL): 2,
        (FR, U): 2,
        (FR, UBR): 2,
        (FR, UF): 2,
        (FR, UFL): 2,
        (FR, UR): 2,
        (L, U): 2,
        (L, UB): 2,
        (L, UF): 2,
        (R, U): 2,
        (R, UB): 2,
        (R, UF): 2,
        (UB, UF): 2,
        (UB, UFL): 2,
        (UB, UFR): 2,
        (UB, UL): 2,
        (UB, UR): 2,
        (UBL, UBR): 2,
        (UBL, UF): 2,
        (UBL, UFL): 2,
        (UBL, UFR): 2,
        (UBL, UR): 2,
        (UBR, UF): 2,
        (UBR, UFL): 2,
        (UBR, UFR): 2,
        (UBR, UL): 2,
        (UF, UL): 2,
        (UF, UR): 2,
        (UFL, UFR): 2,
        (UFL, UR): 2,
        (UFR, UL): 2,
        (UL, UR): 2,

        # DIST 3 PAIRS
        (B, DF): 3,
        (B, DFL): 3,
        (B, DFR): 3,
        (B, FL): 3,
        (B, FR): 3,
        (B, UF): 3,
        (B, UFL): 3,
        (B, UFR): 3,
        (BL, DF): 3,
        (BL, DFR): 3,
        (BL, DR): 3,
        (BL, F): 3,
        (BL, R): 3,
        (BL, UF): 3,
        (BL, UFR): 3,
        (BL, UR): 3,
        (BR, DF): 3,
        (BR, DFL): 3,
        (BR, DL): 3,
        (BR, F): 3,
        (BR, L): 3,
        (BR, UF): 3,
        (BR, UFL): 3,
        (BR, UL): 3,
        (D, UB): 3,
        (D, UBL): 3,
        (D, UBR): 3,
        (D, UF): 3,
        (D, UFL): 3,
        (D, UFR): 3,
        (D, UL): 3,
        (D, UR): 3,
        (DB, F): 3,
        (DB, FL): 3,
        (DB, FR): 3,
        (DB, U): 3,
        (DB, UFL): 3,
        (DB, UFR): 3,
        (DB, UL): 3,
        (DB, UR): 3,
        (DBL, F): 3,
        (DBL, FR): 3,
        (DBL, R): 3,
        (DBL, U): 3,
        (DBL, UF): 3,
        (DBL, UR): 3,
        (DBR, F): 3,
        (DBR, FL): 3,
        (DBR, L): 3,
        (DBR, U): 3,
        (DBR, UF): 3,
        (DBR, UL): 3,
        (DF, U): 3,
        (DF, UBL): 3,
        (DF, UBR): 3,
        (DF, UL): 3,
        (DF, UR): 3,
        (DFL, R): 3,
        (DFL, U): 3,
        (DFL, UB): 3,
        (DFL, UR): 3,
        (DFR, L): 3,
        (DFR, U): 3,
        (DFR, UB): 3,
        (DFR, UL): 3,
        (DL, FR): 3,
        (DL, R): 3,
        (DL, U): 3,
        (DL, UB): 3,
        (DL, UBR): 3,
        (DL, UF): 3,
        (DL, UFR): 3,
        (DR, FL): 3,
        (DR, L): 3,
        (DR, U): 3,
        (DR, UB): 3,
        (DR, UBL): 3,
        (DR, UF): 3,
        (DR, UFL): 3,
        (F, UB): 3,
        (F, UBL): 3,
        (F, UBR): 3,
        (FL, R): 3,
        (FL, UB): 3,
        (FL, UBR): 3,
        (FL, UR): 3,
        (FR, L): 3,
        (FR, UB): 3,
        (FR, UBL): 3,
        (FR, UL): 3,
        (L, UBR): 3,
        (L, UFR): 3,
        (L, UR): 3,
        (R, UBL): 3,
        (R, UFL): 3,
        (R, UL): 3,

        # DIST 4 PAIRS
        (B, F): 4,
        (BL, FR): 4,
        (BR, FL): 4,
        (D, U): 4,
        (DB, UF): 4,
        (DBL, UFR): 4,
        (DBR, UFL): 4,
        (DF, UB): 4,
        (DFL, UBR): 4,
        (DFR, UBL): 4,
        (DL, UR): 4,
        (DR, UL): 4,
        (L, R): 4,

        # UNDEFINED DIST PAIRS
        (B, UNKNOWN): None,
        (BL, UNKNOWN): None,
        (BR, UNKNOWN): None,
        (D, UNKNOWN): None,
        (DB, UNKNOWN): None,
        (DBL, UNKNOWN): None,
        (DBR, UNKNOWN): None,
        (DF, UNKNOWN): None,
        (DFL, UNKNOWN): None,
        (DFR, UNKNOWN): None,
        (DL, UNKNOWN): None,
        (DR, UNKNOWN): None,
        (F, UNKNOWN): None,
        (FL, UNKNOWN): None,
        (FR, UNKNOWN): None,
        (L, UNKNOWN): None,
        (R, UNKNOWN): None,
        (U, UNKNOWN): None,
        (UB, UNKNOWN): None,
        (UBL, UNKNOWN): None,
        (UBR, UNKNOWN): None,
        (UF, UNKNOWN): None,
        (UFL, UNKNOWN): None,
        (UFR, UNKNOWN): None,
        (UL, UNKNOWN): None,
        (UNKNOWN, B): None,
        (UNKNOWN, BL): None,
        (UNKNOWN, BR): None,
        (UNKNOWN, D): None,
        (UNKNOWN, DB): None,
        (UNKNOWN, DBL): None,
        (UNKNOWN, DBR): None,
        (UNKNOWN, DF): None,
        (UNKNOWN, DFL): None,
        (UNKNOWN, DFR): None,
        (UNKNOWN, DL): None,
        (UNKNOWN, DR): None,
        (UNKNOWN, F): None,
        (UNKNOWN, FL): None,
        (UNKNOWN, FR): None,
        (UNKNOWN, L): None,
        (UNKNOWN, R): None,
        (UNKNOWN, U): None,
        (UNKNOWN, UB): None,
        (UNKNOWN, UBL): None,
        (UNKNOWN, UBR): None,
        (UNKNOWN, UF): None,
        (UNKNOWN, UFL): None,
        (UNKNOWN, UFR): None,
        (UNKNOWN, UL): None,
        (UNKNOWN, UR): None,
        (UR, UNKNOWN): None,
        (UNKNOWN, UNKNOWN): None,
    }
    # make distance symmetric
    for (f1, f2), d in list(DIST.items()):
        DIST[(f2, f1)] = d
Example #16
0
    def binarize_ovr(cfsn_vecs,
                     mode=1,
                     keyby='name',
                     ignore_classes={'ignore'}):
        """
        Transforms cfsn_vecs into one-vs-rest BinaryConfusionVectors for each category.

        Args:
            mode (int, default=1): 0 for heirarchy aware or 1 for voc like.
                MODE 0 IS PROBABLY BROKEN
            keyby (int | str) : can be cx or name
            ignore_classes (Set[str]): category names to ignore

        Returns:
            OneVsRestConfusionVectors: which behaves like
                Dict[int, BinaryConfusionVectors]: cx_to_binvecs

        Example:
            >>> # xdoctest: +REQUIRES(module:ndsampler)
            >>> cfsn_vecs = ConfusionVectors.demo()
            >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
            >>> catname_to_binvecs = cfsn_vecs.binarize_ovr(keyby='name')
            >>> print('catname_to_binvecs = {!r}'.format(catname_to_binvecs))

        Notes:
            Consider we want to measure how well we can classify beagles.

            Given a multiclass confusion vector, we need to carefully select a
            subset. We ignore any truth that is coarser than our current label.
            We also ignore any background predictions on irrelevant classes

            y_true     | y_pred     | score
            -------------------------------
            dog        | dog          <- ignore coarser truths
            dog        | cat          <- ignore coarser truths
            dog        | beagle       <- ignore coarser truths
            cat        | dog
            cat        | cat
            cat        | background   <- ignore failures to predict unrelated classes
            cat        | maine-coon
            beagle     | beagle
            beagle     | dog
            beagle     | background
            beagle     | cat
            Snoopy     | beagle
            Snoopy     | cat
            maine-coon | background    <- ignore failures to predict unrelated classes
            maine-coon | beagle
            maine-coon | cat

            Anything not marked as ignore is counted. We count anything marked
            as beagle or a finer grained class (e.g.  Snoopy) as a positive
            case. All other cases are negative. The scores come from the
            predicted probability of beagle, which must be remembered outside
            the dataframe.
        """
        import kwarray

        classes = cfsn_vecs.classes
        data = cfsn_vecs.data

        if mode == 0:
            if cfsn_vecs.probs is None:
                raise ValueError('cannot binarize in mode=0 without probs')
            pdist = classes.idx_pairwise_distance()

        cx_to_binvecs = {}
        for cx in range(len(classes)):
            if classes[cx] == 'background' or classes[cx] in ignore_classes:
                continue

            if mode == 0:
                import warnings
                warnings.warn(
                    'THIS CALCLUATION MIGHT BE WRONG. MANY OTHERS '
                    'IN THIS FILE WERE, AND I HAVENT CHECKED THIS ONE YET')

                # Lookup original probability predictions for the class of interest
                new_scores = cfsn_vecs.probs[:, cx]

                # Determine which truth items have compatible classes
                # Note: we ignore any truth-label that is COARSER than the
                # class-of-interest.
                # E.g: how well do we classify Beagle? -> we should ignore any truth
                # label marked as Dog because it may or may not be a Beagle?
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore', category=RuntimeWarning)
                    dist = pdist[cx]
                    coarser_cxs = np.where(dist < 0)[0]
                    finer_eq_cxs = np.where(dist >= 0)[0]

                is_finer_eq = kwarray.isect_flags(data['true'], finer_eq_cxs)
                is_coarser = kwarray.isect_flags(data['true'], coarser_cxs)

                # Construct a binary data frame to pass to sklearn functions.
                bin_data = {
                    'is_true': is_finer_eq.astype(np.uint8),
                    'pred_score': new_scores,
                    'weight': data['weight'] * (np.float32(1.0) - is_coarser),
                    'txs': cfsn_vecs.data['txs'],
                    'pxs': cfsn_vecs.data['pxs'],
                    'gid': cfsn_vecs.data['gid'],
                }
                bin_data = kwarray.DataFrameArray(bin_data)

                # Ignore cases where we failed to predict an irrelevant class
                flags = (data['pred'] == -1) & (bin_data['is_true'] == 0)
                bin_data['weight'][flags] = 0
                # bin_data = bin_data.compress(~flags)
                bin_cfsn = BinaryConfusionVectors(bin_data, cx, classes)

            elif mode == 1:
                # More VOC-like, not heirarchy friendly

                if cfsn_vecs.probs is not None:
                    # We know the actual score predicted for this category in
                    # this case.
                    is_true = cfsn_vecs.data['true'] == cx
                    pred_score = cfsn_vecs.probs[:, cx]
                else:
                    import warnings
                    warnings.warn(
                        'Binarize ovr is only approximate if not all probabilities are known'
                    )
                    # If we don't know the probabilities for non-predicted
                    # categories then we have to guess.
                    is_true = cfsn_vecs.data['true'] == cx

                    # do we know the actual predicted score for this category?
                    score_is_unknown = data['pred'] != cx
                    pred_score = data['score'].copy()

                    # These scores were for a different class, so assume
                    # other classes were predicted with a uniform prior
                    approx_score = (1 - pred_score[score_is_unknown]) / (
                        len(classes) - 1)

                    # Except in the case where predicted class is -1. In this
                    # case no prediction was actually made (above a threshold)
                    # so the assumed score should be significantly lower, we
                    # conservatively choose zero.
                    unknown_preds = data['pred'][score_is_unknown]
                    approx_score[unknown_preds == -1] = 0

                    pred_score[score_is_unknown] = approx_score

                bin_data = {
                    # is_true denotes if the true class of the item is the
                    # category of interest.
                    'is_true': is_true,
                    'pred_score': pred_score,
                }

                extra = ub.dict_isect(data._data,
                                      ['txs', 'pxs', 'gid', 'weight'])
                bin_data.update(extra)

                bin_data = kwarray.DataFrameArray(bin_data)
                bin_cfsn = BinaryConfusionVectors(bin_data, cx, classes)
            cx_to_binvecs[cx] = bin_cfsn

        if keyby == 'cx':
            cx_to_binvecs = cx_to_binvecs
        elif keyby == 'name':
            cx_to_binvecs = ub.map_keys(cfsn_vecs.classes, cx_to_binvecs)
        else:
            raise KeyError(keyby)

        ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, cfsn_vecs.classes)
        return ovr_cfns
Example #17
0
    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if 0:
            # Automatic way to reduce nodes in the trees?
            # If node b always follows node a, can we contract it?
            nodes1 = [n for p in other_keys for n in p.split('.')]
            nodes2 = [n for p in self_keys for n in p.split('.')]
            tups1 = list(tup for key in other_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            tups2 = list(tup for key in self_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            x = ub.ddict(list)
            for a, b in tups1:
                x[a].append(b)
            for a, b in tups2:
                x[a].append(b)

            nodehist = ub.dict_hist(nodes1 + nodes2)

            for k, v in x.items():
                print('----')
                print(k)
                print(nodehist[k])
                follow_hist = ub.dict_hist(v)
                print(follow_hist)
                total = sum(follow_hist.values())
                if ub.allsame(follow_hist.values()) and total == nodehist[k]:
                    print('CONTRACT')

            # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2]))
            # print(forest_str(paths_to_otree(other_keys, '.')))

        # common_keys = other_keys.intersection(self_keys)
        # if not common_keys:
        if not other_keys.issubset(self_keys):
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association in {'embedding', 'isomorphism'}:
                if verbose > 1:
                    print('Using subpath {} association, may take some time'.
                          format(association))
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)

                if 1:
                    # hack to filter to reduce tree size in embedding problem
                    def shrink_paths(paths):
                        new_paths = []
                        for p in paths:
                            p = p.replace('.0', ':0')
                            p = p.replace('.1', ':1')
                            p = p.replace('.2', ':2')
                            p = p.replace('.3', ':3')
                            p = p.replace('.4', ':4')
                            p = p.replace('.5', ':5')
                            p = p.replace('.6', ':6')
                            p = p.replace('.7', ':7')
                            p = p.replace('.8', ':8')
                            p = p.replace('.9', ':9')
                            p = p.replace('.weight', ':weight')
                            p = p.replace('.bias', ':bias')
                            p = p.replace('.num_batches_tracked',
                                          ':num_batches_tracked')
                            p = p.replace('.running_mean', ':running_mean')
                            p = p.replace('.running_var', ':running_var')
                            # p = p.replace('.conv1', ':conv1')
                            # p = p.replace('.conv2', ':conv2')
                            # p = p.replace('.conv3', ':conv3')
                            # p = p.replace('.bn1', ':bn1')
                            # p = p.replace('.bn2', ':bn2')
                            # p = p.replace('.bn3', ':bn3')
                            new_paths.append(p)
                        return new_paths

                    # Reducing the depth saves a lot of time
                    paths1_ = shrink_paths(paths1)
                    paths2_ = shrink_paths(paths2)

                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1_, paths2_, sep='.', mode=association)
                subpaths1 = [p.replace(':', '.') for p in subpaths1]
                subpaths2 = [p.replace(':', '.') for p in subpaths2]
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    other_unmapped = sorted(other_keys - set(mapping.keys()))
                    self_unmapped = sorted(self_keys - set(mapping.values()))
                    print('-- embed association (other -> self) --')
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                    print('self_unmapped = {}'.format(
                        ub.repr2(self_unmapped, nl=1)))
                    print('other_unmapped = {}'.format(
                        ub.repr2(other_unmapped, nl=1)))
                    print('len(mapping) = {}'.format(
                        ub.repr2(len(mapping), nl=1)))
                    print('len(self_unmapped) = {}'.format(
                        ub.repr2(len(self_unmapped), nl=1)))
                    print('len(other_unmapped) = {}'.format(
                        ub.repr2(len(other_unmapped), nl=1)))
                    print('-- end embed association --')

                # HACK: something might be wrong, there was an instance with
                # HRNet_w32 where multiple keys mapped to the same key
                # bad keys were incre_modules.3.0.conv1.weight and conv1.weight
                #
                # This will not error, but may produce bad output
                try:
                    model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                                   model_state_dict)
                except Exception as ex:
                    HACK = 1
                    if HACK:
                        new_state_dict_ = {}
                        for k, v in model_state_dict.items():
                            new_state_dict_[mapping.get(k, k)] = v
                        model_state_dict = new_state_dict_
                        warnings.warn('ex = {!r}'.format(ex))
                    else:
                        raise
            else:
                raise KeyError(association)
        return model_state_dict
Example #18
0
def demo_refresh():
    r"""
    CommandLine:
        python -m graphid.core.refresh demo_refresh \
                --num_pccs=40 --size=2 --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from graphid.core.refresh import *  # NOQA
        >>> demo_refresh()
        >>> util.show_if_requested()
    """
    from graphid import demo
    # import utool as ut
    # demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4})
    # refreshkw = ut.argparse_funckw(RefreshCriteria)
    demokw = {'num_pccs': 50, 'size': 4}
    refreshkw = dict(window=20, patience=72, thresh=.1,
                     method='binomial')
    # make an inference object
    infr = demo.demodata_infr(size_std=0, **demokw)
    edges = list(infr.ranker.predict_candidate_edges(infr.aids, K=100))
    scores = np.array(infr.verifier.predict_edges(edges))
    sortx = scores.argsort()[::-1]
    edges = list(ub.take(edges, sortx))
    scores = scores[sortx]
    ys = infr.match_state_df(edges)[POSTV].values
    y_remainsum = ys[::-1].cumsum()[::-1]
    # Do oracle reviews and wait to converge
    refresh = RefreshCriteria(**refreshkw)
    xdata = []
    pprob_any = []
    rfrac_any = []
    for count, (edge, y) in enumerate(zip(edges, ys)):
        refresh.add(y, user_id='user:oracle')
        rfrac_any.append(y_remainsum[count] / y_remainsum[0])
        pprob_any.append(refresh.prob_any_remain())
        xdata.append(count + 1)
        if refresh.check():
            break
    xdata = xdata
    ydatas = ub.odict([
        ('Est. probability any remain', pprob_any),
        ('Fraction remaining', rfrac_any),
    ])

    # xdoctest: +REQUIRES(--show)
    from graphid import util
    util.qtensure()
    # from ibeis.scripts.thesis import TMP_RC
    # import matplotlib as mpl
    import matplotlib.pyplot as plt
    # mpl.rcParams.update(TMP_RC)
    util.multi_plot(
        xdata, ydatas, xlabel='# manual reviews',
        # rcParams=TMP_RC,
        marker='',
        ylim=(0, 1), use_legend=False,
    )
    demokw = ub.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'},
                         demokw)
    thresh = refreshkw.pop('thresh')
    refreshkw['span'] = refreshkw.pop('window')
    util.relative_text((.02, .58 + .0), ub.repr2(demokw, sep=' ', nl=0)[1:],
                       valign='bottom')
    util.relative_text((.02, .68 + .0), ub.repr2(refreshkw, sep=' ', nl=0)[1:],
                       valign='bottom')
    legend = plt.gca().legend()
    legend.get_frame().set_alpha(1.0)
    plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')