Exemple #1
0
def draw_instance_contours(img, gti, gtl=None, thickness=2, alpha=1, color=None):
    """

    img = util.imread('/home/joncrall/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_RGB.tif')
    gti = util.imread(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_GTI.tif'))
    gtl = util.imread('/home/joncrall/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_GTL.tif')
    thickness = 2
    alpha = 1

    """
    import cv2

    grouped_contours = instance_contours(gti)

    if gtl is not None:
        unknown_labels = set(np.unique(gti[gtl == 65]))
    else:
        unknown_labels = set()

    known_labels = set(grouped_contours.keys()) - unknown_labels

    BGR_GREEN = (0, 255, 0)
    BGR_BLUE = (255, 0, 0)
    img = util.ensure_float01(img)
    base = np.ascontiguousarray((255 * img[:, :, 0:3]).astype(np.uint8))

    # Draw an image to overlay first
    draw_img = np.zeros(base.shape, dtype=np.uint8)

    if color is None:
        color = BGR_GREEN

    known_contours = np.array(list(ub.flatten(list(ub.take(grouped_contours, known_labels)))))
    draw_img = cv2.drawContours(
        image=draw_img, contours=known_contours,
        contourIdx=-1, color=color, thickness=thickness)

    if unknown_labels:
        unknown_contours = np.array(list(ub.flatten(ub.take(grouped_contours, unknown_labels))))
        draw_img = cv2.drawContours(
            image=draw_img, contours=unknown_contours,
            contourIdx=-1, color=BGR_BLUE, thickness=thickness)

    contour_overlay = util.ensure_alpha_channel(draw_img, alpha=0)
    contour_overlay.T[3].T[draw_img.sum(axis=2) > 0] = alpha

    # zero out the edges to avoid visualization errors
    contour_overlay[0:thickness, :, :] = 0
    contour_overlay[-thickness:, :, :] = 0
    contour_overlay[:, 0:thickness, :] = 0
    contour_overlay[:, -thickness:, :] = 0

    # img1 = contour_overlay
    # img2 = base
    # from clab import profiler
    # _ = profiler.profile_onthefly(util.overlay_alpha_images)(contour_overlay, base, keepalpha=False)

    draw_img = util.overlay_alpha_images(contour_overlay, base, keepalpha=False)
    draw_img = np.ascontiguousarray((255 * draw_img[:, :, 0:3]).astype(np.uint8))
    return draw_img
Exemple #2
0
 def unique(self, normalize=False):
     """
     Returns the unique channels that will need to be given or loaded
     """
     if normalize:
         return set(ub.flatten(self.parse().values()))
     else:
         return set(ub.flatten(self.normalize().values()))
Exemple #3
0
def demodata_infr2(defaultdb='PZ_MTEST'):
    import ibeis
    from graphid.core.annot_inference import AnnotInference
    defaultdb = 'PZ_MTEST'
    ibs = ibeis.opendb(defaultdb=defaultdb)
    annots = ibs.annots()
    names = list(annots.group_items(annots.nids).values())[0:20]

    def dummy_phi(c, n):
        x = np.arange(n)
        phi = c * x / (c * x + 1)
        phi = phi / phi.sum()
        phi = np.diff(phi)
        return phi

    phis = {c: dummy_phi(c, 30) for c in range(1, 4)}
    aids = list(ub.flatten(names))
    infr = AnnotInference(ibs, aids, autoinit=True)
    infr.init_termination_criteria(phis)
    infr.init_refresh_criteria()

    # Partially review
    n1, n2, n3, n4 = names[0:4]
    for name in names[4:]:
        for a, b in ub.iter_window(name.aids, 2):
            infr.add_feedback((a, b), POSTV)

    for name1, name2 in it.combinations(names[4:], 2):
        infr.add_feedback((name1.aids[0], name2.aids[0]), NEGTV)
    return infr
Exemple #4
0
    def pack(self):
        """
        Pack all of the data in this container into a single tensor.

        Returns:
            Tensor: packed data, padded with ``self.padding_value`` if
            ``self.stack`` is False.

        Example:
            >>> self = BatchContainer.demo('img')
            >>> print(self.pack())
            >>> self = BatchContainer.demo('box')
            >>> print(self.pack())
            >>> self = BatchContainer.demo('labels')
            >>> print(self.pack())
        """
        if self.stack:
            # Should be a straight forward concatenation
            packed = torch.cat(self.data, dim=0)
        else:
            # Need to account for padding values
            from netharn.data.collate import padded_collate
            inbatch = list(ub.flatten(self.data))
            packed = padded_collate(inbatch, fill_value=self.padding_value)
        return packed
Exemple #5
0
def decollate_batch(batch):
    """
    Breakup a collated batch of BatchContainers back into ItemContainers

    Example:
        >>> bsize = 5
        >>> batch_items = [
        >>>     {
        >>>         'im': ItemContainer.demo('img'),
        >>>         'label': ItemContainer.demo('labels'),
        >>>         'box': ItemContainer.demo('box'),
        >>>     }
        >>>     for _ in range(bsize)
        >>> ]
        >>> batch = container_collate(batch_items, num_devices=2)
        >>> decollated = decollate_batch(batch)
        >>> assert len(decollated) == len(batch_items)
        >>> assert (decollated[0]['im'].data == batch_items[0]['im'].data).all()
    """
    import ubelt as ub
    from kwcoco.util.util_json import IndexableWalker
    walker = IndexableWalker(batch)
    decollated_dict = ub.AutoDict()
    decollated_walker = IndexableWalker(decollated_dict)
    for path, batch_val in walker:
        if isinstance(batch_val, BatchContainer):
            for bx, item_val in enumerate(ub.flatten(batch_val.data)):
                decollated_walker[[bx] + path] = ItemContainer(item_val)
    decollated = list(decollated_dict.to_dict().values())
    return decollated
Exemple #6
0
def graph_info(graph, ignore=None, stats=False, verbose=False):
    from graphid import util
    import pandas as pd

    node_dict = graph.nodes
    node_attrs = list(node_dict.values())
    edge_attrs = list(take_column(graph.edges(data=True), 2))

    if stats:
        node_df = pd.DataFrame(node_attrs)
        edge_df = pd.DataFrame(edge_attrs)
        if ignore is not None:
            util.delete_dict_keys(node_df, ignore)
            util.delete_dict_keys(edge_df, ignore)
        # Not really histograms anymore
        try:
            node_attr_hist = node_df.describe().to_dict()
        except ValueError:
            node_attr_hist
        try:
            edge_attr_hist = edge_df.describe().to_dict()
        except ValueError:
            edge_attr_hist = {}
        key_order = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
        node_attr_hist = ub.map_dict_vals(lambda x: util.order_dict_by(x, key_order), node_attr_hist)
        edge_attr_hist = ub.map_dict_vals(lambda x: util.order_dict_by(x, key_order), edge_attr_hist)
    else:
        node_attr_hist = ub.dict_hist(ub.flatten([attr.keys() for attr in node_attrs]))
        edge_attr_hist = ub.dict_hist(ub.flatten([attr.keys() for attr in edge_attrs]))
        if ignore is not None:
            util.delete_dict_keys(edge_attr_hist, ignore)
            util.delete_dict_keys(node_attr_hist, ignore)
    node_type_hist = ub.dict_hist(list(map(type, graph.nodes())))
    info_dict = ub.odict([
        ('directed', graph.is_directed()),
        ('multi', graph.is_multigraph()),
        ('num_nodes', len(graph)),
        ('num_edges', len(list(graph.edges()))),
        ('edge_attr_hist', util.sort_dict(edge_attr_hist)),
        ('node_attr_hist', util.sort_dict(node_attr_hist)),
        ('node_type_hist', util.sort_dict(node_type_hist)),
        ('graph_attrs', graph.graph),
        ('graph_name', graph.name),
    ])
    if verbose:
        print(ub.repr2(info_dict))
    return info_dict
Exemple #7
0
 def paths(self, cwd=None, recursive=False):
     groups = (p.paths(cwd=cwd, recursive=recursive) for p in self.patterns)
     if self.predicate in {any}:  # all}:
         yield from ub.unique(ub.flatten(groups))
     elif self.predicate in {all}:  # all}:
         yield from set.intersection(*map(set, groups))
     else:
         raise NotImplementedError
Exemple #8
0
    def __init__(self, pblm, pccs, dim=224, augment=True):
        chip_config = {
            # preserve aspect ratio, use letterbox to fit into network
            'resize_dim': 'maxwh',
            'dim_size': dim,

            # 'resize_dim': 'wh',
            # 'dim_size': (dim, dim)
        }
        self.pccs = pccs
        all_aids = list(ub.flatten(pccs))
        all_fpaths = pblm.infr.ibs.depc_annot.get('chips',
                                                  all_aids,
                                                  read_extern=False,
                                                  colnames='img',
                                                  config=chip_config)

        self.aid_to_fpath = dict(zip(all_aids, all_fpaths))

        # self.multitons_pccs = [pcc for pcc in pccs if len(pcc) > 1]
        self.pos_pairs = []

        # SAMPLE ALL POSSIBLE POS COMBINATIONS AND IGNORE INCOMPARABLE
        self.infr = pblm.infr
        # TODO: each sample should be weighted depending on n_aids in its pcc
        for pcc in pccs:
            if len(pcc) >= 2:
                # ut.random_combinations
                edges = np.array(
                    list(it.starmap(self.infr.e_, it.combinations(pcc, 2))))
                is_comparable = self.is_comparable(edges)
                pos_edges = edges[is_comparable]
                self.pos_pairs.extend(list(pos_edges))
        rng = nh.util.ensure_rng(self.SEED, 'numpy')
        self.pyrng = nh.util.ensure_rng(self.SEED + 1, 'python')
        self.rng = rng

        # Be good data citizens, construct a dataset identifier
        depends = [
            sorted(map(sorted, self.pccs)),
        ]
        hashid = ub.hash_data(depends)[:12]
        self.input_id = '{}-{}'.format(len(self), hashid)

        if augment:
            import imgaug.augmenters as iaa
            # NOTE: we are only using `self.augmenter` to make a hyper hashid
            # in __getitem__ we invoke transform explicitly for fine control
            self.hue = nh.data.transforms.HSVShift(hue=0.1, sat=1.5, val=1.5)
            self.crop = iaa.Crop(percent=(0, .2))
            self.flip = iaa.Fliplr(p=.5)
            self.augmenter = iaa.Sequential([self.hue, self.crop, self.flip])
        else:
            self.augmenter = None
        self.letterbox = nh.data.transforms.Resize(target_size=(dim, dim),
                                                   mode='letterbox')
Exemple #9
0
def demodata_mtest_infr(state='empty'):
    import ibeis
    from graphid.core.annot_inference import AnnotInference
    ibs = ibeis.opendb(db='PZ_MTEST')
    annots = ibs.annots()
    names = list(annots.group_items(annots.nids).values())
    util.shuffle(names, rng=321)
    test_aids = list(ub.flatten(names[1::2]))
    infr = AnnotInference(ibs, test_aids, autoinit=True)
    infr.reset(state=state)
    return infr
Exemple #10
0
def rank_inventory(inventory):
    candidates = list(ub.flatten(list(pkmn.family(ancestors=False, node=True))
                                 for pkmn in inventory))

    groups = ub.group_items(candidates, key=lambda p: p.name)

    leages = {
        'master': {'max_cp': float('inf')},
        'ultra': {'max_cp': 2500},
        'great': {'max_cp': 1500},
        'little': {'max_cp': 500},
    }

    max_level = 45  # for XL candy
    # max_level = 40  # normal

    all_dfs = []

    for name, group in groups.items():
        print('\n\n------------\n\n')
        print('name = {!r}'.format(name))
        for leage_name, leage_filters in leages.items():
            max_cp = leage_filters['max_cp']
            print('')
            print(' ========== ')
            print(' --- {} in {} --- '.format(name, leage_name))
            not_eligible = [p for p in group if p.cp is not None and p.cp > max_cp]
            eligible = [p for p in group if p.cp is None or p.cp <= max_cp]
            print('not_eligible = {!r}'.format(not_eligible))
            if len(eligible) > 0:
                first = ub.peek(eligible)
                have_ivs = eligible
                df = first.leage_rankings_for(have_ivs, max_cp=max_cp,
                                              max_level=max_level)
                all_dfs.append(df)
            else:
                print('none eligable')

    # Print out the best ranks for each set of IVS over all possible forms
    # (lets you know which ones can be transfered safely)

    iv_to_rank = ub.ddict(list)
    for df in all_dfs:
        if df is not None:
            df = df.set_index(['iva', 'ivd', 'ivs'])
            for iv, rank in zip(df.index, df['rank']):
                iv_to_rank[iv].append(rank)

    iv_to_best_rank = ub.map_vals(sorted, iv_to_rank)
    iv_to_best_rank = ub.sorted_vals(iv_to_best_rank)
    print('iv_to_best_rank = {}'.format(ub.repr2(iv_to_best_rank, nl=1, align=':')))
Exemple #11
0
def demodata_tarjan_bridge():
    """
    Example:
        >>> from graphid import util
        >>> G = demodata_tarjan_bridge()
        >>> # xdoc: +REQUIRES(--show)
        >>> util.show_nx(G)
        >>> util.show_if_requested()
    """
    # define 2-connected compoments and bridges
    cc2 = [(1, 2, 4, 3, 1, 4), (5, 6, 7, 5), (8, 9, 10, 8),
             (17, 18, 16, 15, 17), (11, 12, 14, 13, 11, 14)]
    bridges = [(4, 8), (3, 5), (3, 17)]
    G = nx.Graph(ub.flatten(ub.iter_window(path, 2) for path in cc2 + bridges))
    return G
Exemple #12
0
    def normalize(self):
        """
        Replace aliases with explicit single-band-per-code specs

        Example:
            >>> self = ChannelSpec('b1|b2|b3|rgb')
            >>> self.normalize()
            >>> list(self.keys())
        """
        new_parsed = {}
        for k1, v1 in self.parse().items():
            norm_vals = list(
                ub.flatten(self._alias_lut.get(v, v).split('|') for v in v1))
            norm_key = '|'.join(norm_vals)
            new_parsed[norm_key] = norm_vals
        new_spec = ','.join(list(new_parsed.keys()))
        normed = ChannelSpec(new_spec, parsed=new_parsed)
        return normed
Exemple #13
0
    def __getitem__(self, index):
        # Choose a label for each item in the batch
        if not hasattr(self.rng, 'choices'):
            # python 3.5 support
            chosen_labels = [
                self.rng.choice(self.labels) for _ in range(self.batch_size)
            ]
        else:
            chosen_labels = self.rng.choices(self.labels, k=self.batch_size)
        # Count the number of items we need for each label
        label_freq = ub.dict_hist(chosen_labels)

        # Sample those indices
        batch_idxs = list(
            ub.flatten([
                self.label_to_subsampler[label].sample(num)
                for label, num in label_freq.items()
            ]))
        return batch_idxs
Exemple #14
0
 def _balance_report(self, limit=None):
     # Print the epoch / item label frequency per epoch
     label_sequence = []
     index_sequence = []
     if limit is None:
         limit = self.num_batches
     for item_indices, _ in zip(self, range(limit)):
         item_indices = np.array(item_indices)
         item_labels = list(
             ub.flatten(ub.take(self.index_to_labels, item_indices)))
         index_sequence.extend(item_indices)
         label_sequence.extend(ub.unique(item_labels))
     label_hist = ub.dict_hist(label_sequence)
     index_hist = ub.dict_hist(index_sequence)
     label_hist = ub.sorted_vals(label_hist, reverse=True)
     index_hist = ub.sorted_vals(index_hist, reverse=True)
     index_hist = ub.dict_subset(index_hist, list(index_hist.keys())[0:5])
     print('label_hist = {}'.format(ub.repr2(label_hist, nl=1)))
     print('index_hist = {}'.format(ub.repr2(index_hist, nl=1)))
Exemple #15
0
def take_percentile_parts(arr, front=None, mid=None, back=None):
    """
    Take parts from front, back, or middle of a list

    Example:
        >>> arr = list(range(20))
        >>> front = 3
        >>> mid = 3
        >>> back = 3
        >>> result = take_percentile_parts(arr, front, mid, back)
        >>> print(result)
        [0, 1, 2, 9, 10, 11, 17, 18, 19]
    """
    slices = []
    if front:
        slices += [snapped_slice(len(arr), 0.0, front)]
    if mid:
        slices += [snapped_slice(len(arr), 0.5, mid)]
    if back:
        slices += [snapped_slice(len(arr), 1.0, back)]
    parts = list(ub.flatten([arr[sl] for sl in slices]))
    return parts
Exemple #16
0
    def sort_entries(bibman):
        def freq_group(items, groupids):
            groups = ub.group_items(items, groupids)
            hist = ub.map_vals(len, groups)
            for k in ub.argsort(hist):
                yield groups[k]

        high_level_alias = {
            'incollection': 'book',
            'conference': 'confjourn',
            'journal': 'confjourn',
            'online-journal': 'confjourn',
        }
        sorted_entries = []
        entries = list(bibman.cleaned.values())
        groups = [
            high_level_alias.get(entry['pub_type'], entry['pub_type'])
            for entry in entries
        ]
        entry_groups = freq_group(entries, groups)
        for group in entry_groups:
            subids = [entry['ENTRYTYPE'] for entry in group]
            for subgroup in freq_group(group, subids):
                subsubids = [entry['pub_full'] for entry in subgroup]
                # Group publications, and then sort conferences by max date
                pub_groups = []
                pub_maxdates = []
                for ssg in freq_group(subgroup, subsubids):
                    sssid = [(entry['date']) for entry in ssg]
                    ssg2 = list(ub.take(ssg, ub.argsort(sssid)))
                    pub_groups.append(ssg2)
                    pub_maxdates.append(ssg2[-1]['date'])
                subgroup2 = list(
                    ub.flatten(ut.sortedby2(pub_groups, pub_maxdates)))
                sorted_entries.extend(subgroup2)
        new_entries = ub.odict([(e['ID'], e) for e in sorted_entries])
        [e['pub_type'] for e in sorted_entries]
        bibman.cleaned = new_entries
Exemple #17
0
    def _simple_sample(self):
        # Simple strategy for creating examples
        infr = self.infr

        self._triple_pool = []

        for aid1, aid2 in self.infr.pos_graph.edges():
            cc = infr.pos_graph.connected_to(aid1)
            neg_edges = graphid.util.edges_outgoing(self.infr.neg_graph, [aid1, aid2])
            neg_aids = []
            for edge in neg_edges:
                neg_aids.append(set(edge) - {aid1, aid2})
            neg_aids = list(ub.flatten(neg_aids))

            if neg_aids:
                aid3 = random.choice(neg_aids)
            else:
                cc2 = next(infr.find_non_neg_redun_pccs(cc=cc, k=1))[1]
                aid3 = random.choice(list(cc2))

            # Check that we actually have the data
            if aid1 in self.coco_dset.anns and aid2 in self.coco_dset.anns and aid3 in self.coco_dset.anns:
                self._triple_pool.append((aid1, aid2, aid3))
Exemple #18
0
    def __getitem__(self, index):
        if not self.shuffle:
            import kwarray
            self.rng = kwarray.ensure_rng(index, api='python')

        sub_pccs = self.rng.sample(self.multitons, self.p)

        groups = []
        for sub_pcc in sub_pccs:
            aids = self.rng.sample(sub_pcc, min(self.k, len(sub_pcc)))
            groups.append(aids)

        nhave = sum(map(len, groups))
        while nhave < self.batch_size:
            sub_pcc = self.rng.choice(self.pccs)
            aids = self.rng.sample(sub_pcc, min(self.k, len(sub_pcc)))
            groups.append(aids)
            nhave = sum(map(len, groups))
            overshoot = nhave - self.batch_size
            if overshoot:
                groups[-1] = groups[-1][:-overshoot]

        indices = sorted(ub.flatten(groups))
        return indices
Exemple #19
0
def _assign_confusion_vectors(true_dets,
                              pred_dets,
                              bg_weight=1.0,
                              iou_thresh=0.5,
                              bg_cidx=-1,
                              bias=0.0,
                              classes=None,
                              compat='all',
                              prioritize='iou',
                              ignore_classes='ignore',
                              max_dets=None):
    """
    Create confusion vectors for detections by assigning to ground true boxes

    Given predictions and truth for an image return (y_pred, y_true,
    y_score), which is suitable for sklearn classification metrics

    Args:
        true_dets (Detections):
            groundtruth with boxes, classes, and weights

        pred_dets (Detections):
            predictions with boxes, classes, and scores

        iou_thresh (float, default=0.5):
            bounding box overlap iou threshold required for assignment

        bias (float, default=0.0):
            for computing bounding box overlap, either 1 or 0

        gids (List[int], default=None):
            which subset of images ids to compute confusion metrics on. If
            not specified all images are used.

        compat (str, default='all'):
            can be ('ancestors' | 'mutex' | 'all').  determines which pred
            boxes are allowed to match which true boxes. If 'mutex', then
            pred boxes can only match true boxes of the same class. If
            'ancestors', then pred boxes can match true boxes that match or
            have a coarser label. If 'all', then any pred can match any
            true, regardless of its category label.

        prioritize (str, default='iou'):
            can be ('iou' | 'class' | 'correct') determines which box to
            assign to if mutiple true boxes overlap a predicted box.  if
            prioritize is iou, then the true box with maximum iou (above
            iou_thresh) will be chosen.  If prioritize is class, then it will
            prefer matching a compatible class above a higher iou. If
            prioritize is correct, then ancestors of the true class are
            preferred over descendents of the true class, over unreleated
            classes.

        bg_cidx (int, default=-1):
            The index of the background class.  The index used in the truth
            column when a predicted bounding box does not match any true
            bounding box.

        classes (List[str] | kwcoco.CategoryTree):
            mapping from class indices to class names. Can also contain class
            heirarchy information.

        ignore_classes (str | List[str]):
            class name(s) indicating ignore regions

        max_dets (int): maximum number of detections to consider

    TODO:
        - [ ] This is a bottleneck function. An implementation in C / C++ /
        Cython would likely improve the overall system.

        - [ ] Implement crowd truth. Allow multiple predictions to match any
              truth objet marked as "iscrowd".

    Returns:
        dict: with relevant confusion vectors. This keys of this dict can be
            interpreted as columns of a data frame. The `txs` / `pxs` columns
            represent the indexes of the true / predicted annotations that were
            assigned as matching. Additionally each row also contains the true
            and predicted class index, the predicted score, the true weight and
            the iou of the true and predicted boxes. A `txs` value of -1 means
            that the predicted box was not assigned to a true annotation and a
            `pxs` value of -1 means that the true annotation was not assigne to
            any predicted annotation.

    Example:
        >>> # xdoctest: +REQUIRES(module:pandas)
        >>> import pandas as pd
        >>> import kwimage
        >>> # Given a raw numpy representation construct Detection wrappers
        >>> true_dets = kwimage.Detections(
        >>>     boxes=kwimage.Boxes(np.array([
        >>>         [ 0,  0, 10, 10], [10,  0, 20, 10],
        >>>         [10,  0, 20, 10], [20,  0, 30, 10]]), 'tlbr'),
        >>>     weights=np.array([1, 0, .9, 1]),
        >>>     class_idxs=np.array([0, 0, 1, 2]))
        >>> pred_dets = kwimage.Detections(
        >>>     boxes=kwimage.Boxes(np.array([
        >>>         [6, 2, 20, 10], [3,  2, 9, 7],
        >>>         [3,  9, 9, 7],  [3,  2, 9, 7],
        >>>         [2,  6, 7, 7],  [20,  0, 30, 10]]), 'tlbr'),
        >>>     scores=np.array([.5, .5, .5, .5, .5, .5]),
        >>>     class_idxs=np.array([0, 0, 1, 2, 0, 1]))
        >>> bg_weight = 1.0
        >>> compat = 'all'
        >>> iou_thresh = 0.5
        >>> bias = 0.0
        >>> import kwcoco
        >>> classes = kwcoco.CategoryTree.from_mutex(list(range(3)))
        >>> bg_cidx = -1
        >>> y = _assign_confusion_vectors(true_dets, pred_dets, bias=bias,
        >>>                               bg_weight=bg_weight, iou_thresh=iou_thresh,
        >>>                               compat=compat)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
           pred  true  score  weight     iou  txs  pxs
        0     1     2 0.5000  1.0000  1.0000    3    5
        1     0    -1 0.5000  1.0000 -1.0000   -1    4
        2     2    -1 0.5000  1.0000 -1.0000   -1    3
        3     1    -1 0.5000  1.0000 -1.0000   -1    2
        4     0    -1 0.5000  1.0000 -1.0000   -1    1
        5     0     0 0.5000  0.0000  0.6061    1    0
        6    -1     0 0.0000  1.0000 -1.0000    0   -1
        7    -1     1 0.0000  0.9000 -1.0000    2   -1

    Ignore:
        from xinspect.dynamic_kwargs import get_func_kwargs
        globals().update(get_func_kwargs(_assign_confusion_vectors))

    Example:
        >>> # xdoctest: +REQUIRES(module:pandas)
        >>> import pandas as pd
        >>> from kwcoco.metrics import DetectionMetrics
        >>> dmet = DetectionMetrics.demo(nimgs=1, nclasses=8,
        >>>                              nboxes=(0, 20), n_fp=20,
        >>>                              box_noise=.2, cls_noise=.3)
        >>> classes = dmet.classes
        >>> gid = 0
        >>> true_dets = dmet.true_detections(gid)
        >>> pred_dets = dmet.pred_detections(gid)
        >>> y = _assign_confusion_vectors(true_dets, pred_dets,
        >>>                               classes=dmet.classes,
        >>>                               compat='all', prioritize='class')
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
        >>> y = _assign_confusion_vectors(true_dets, pred_dets,
        >>>                               classes=dmet.classes,
        >>>                               compat='ancestors', iou_thresh=.5)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
    """
    import kwarray
    valid_compat_keys = {'ancestors', 'mutex', 'all'}
    if compat not in valid_compat_keys:
        raise KeyError(compat)
    if classes is None and compat == 'ancestors':
        compat = 'mutex'

    if compat == 'mutex':
        prioritize = 'iou'

    # Group true boxes by class
    # Keep track which true boxes are unused / not assigned
    unique_tcxs, tgroupxs = kwarray.group_indices(true_dets.class_idxs)
    cx_to_txs = dict(zip(unique_tcxs, tgroupxs))

    unique_pcxs = np.array(sorted(set(pred_dets.class_idxs)))

    if classes is None:
        import kwcoco
        # Build mutually exclusive category tree
        all_cxs = sorted(
            set(map(int, unique_pcxs)) | set(map(int, unique_tcxs)))
        all_cxs = list(range(max(all_cxs) + 1))
        classes = kwcoco.CategoryTree.from_mutex(all_cxs)

    cx_to_ancestors = classes.idx_to_ancestor_idxs()

    if prioritize == 'iou':
        pdist_priority = None  # TODO: cleanup
    else:
        pdist_priority = _fast_pdist_priority(classes, prioritize)

    if compat == 'mutex':
        # assume classes are mutually exclusive if hierarchy is not given
        cx_to_matchable_cxs = {cx: [cx] for cx in unique_pcxs}
    elif compat == 'ancestors':
        cx_to_matchable_cxs = {
            cx: sorted([cx] + sorted(
                ub.take(classes.node_to_idx,
                        nx.ancestors(classes.graph, classes.idx_to_node[cx]))))
            for cx in unique_pcxs
        }
    elif compat == 'all':
        cx_to_matchable_cxs = {cx: unique_tcxs for cx in unique_pcxs}
    else:
        raise KeyError(compat)

    if compat == 'all':
        # In this case simply run the full pairwise iou
        common_true_idxs = np.arange(len(true_dets))
        cx_to_matchable_txs = {cx: common_true_idxs for cx in unique_pcxs}
        common_ious = pred_dets.boxes.ious(true_dets.boxes, bias=bias)
        # common_ious = pred_dets.boxes.ious(true_dets.boxes, impl='c', bias=bias)
        iou_lookup = dict(enumerate(common_ious))
    else:
        # For each pred-category find matchable true-indices
        cx_to_matchable_txs = {}
        for cx, compat_cx in cx_to_matchable_cxs.items():
            matchable_cxs = cx_to_matchable_cxs[cx]
            compat_txs = ub.dict_take(cx_to_txs, matchable_cxs, default=[])
            compat_txs = np.array(sorted(ub.flatten(compat_txs)), dtype=int)
            cx_to_matchable_txs[cx] = compat_txs

        # Batch up the IOU pre-computation between compatible truths / preds
        iou_lookup = {}
        unique_pred_cxs, pgroupxs = kwarray.group_indices(pred_dets.class_idxs)
        for cx, pred_idxs in zip(unique_pred_cxs, pgroupxs):
            true_idxs = cx_to_matchable_txs[cx]
            ious = pred_dets.boxes[pred_idxs].ious(true_dets.boxes[true_idxs],
                                                   bias=bias)
            _px_to_iou = dict(zip(pred_idxs, ious))
            iou_lookup.update(_px_to_iou)

    iou_thresh_list = ([iou_thresh]
                       if not ub.iterable(iou_thresh) else iou_thresh)

    iou_thresh_to_y = {}
    for iou_thresh_ in iou_thresh_list:
        isvalid_lookup = {
            px: ious > iou_thresh_
            for px, ious in iou_lookup.items()
        }

        y = _critical_loop(true_dets,
                           pred_dets,
                           iou_lookup,
                           isvalid_lookup,
                           cx_to_matchable_txs,
                           bg_weight,
                           prioritize,
                           iou_thresh_,
                           pdist_priority,
                           cx_to_ancestors,
                           bg_cidx,
                           ignore_classes=ignore_classes,
                           max_dets=max_dets)
        iou_thresh_to_y[iou_thresh_] = y

    if ub.iterable(iou_thresh):
        return iou_thresh_to_y
    else:
        return y
Exemple #20
0
    def compute_likely_overlaps(pfiles1, pfiles2):
        step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1)
        step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2)
        step_idx = min(step_idx1, step_idx2)
        grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx)
        grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx)

        thresh = 0.2
        verbose = 1

        # TODO: it would be nice if we didn't have to care about internal
        # deduplication when we attempt to find cross-set overlaps
        dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)
        dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        pfiles = inv1.pfiles + inv2.pfiles
        dups3 = ProgressiveFile.likely_duplicates(pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        only_on_inv2 = {}
        for key, group in dups3.items():
            if not any(
                    item.fpath.startswith(inv1.root_fpath) for item in group):
                only_on_inv2[key] = group

        for p1 in inv1.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath:
                break

        for p2 in inv2.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath:
                break

        look = list(ub.flatten(only_on_inv2.values()))
        takealook = sorted([p.fpath for p in look])
        print('takealook = {}'.format(ub.repr2(takealook, nl=1)))

        keys1 = set(grouped1)
        keys2 = set(grouped2)

        missing_keys2 = keys2 - keys1
        missing_groups2 = ub.dict_subset(grouped2, missing_keys2)

        missing_fpaths2 = []
        for key, values in missing_groups2.items():
            print('key = {!r}'.format(key))
            print('values = {}'.format(ub.repr2(values, nl=1)))
            missing_fpaths2.extend(values)

        missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2])
        print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1)))
        # pass

        import xdev
        set_overlaps = xdev.set_overlaps(keys1, keys2)
        print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
Exemple #21
0
def main():
    # TODO: progressive hashing data structure
    inv1 = Inventory('/media/joncrall/raid/', blocklist)
    inv2 = Inventory('/media/joncrall/media', blocklist)

    # inv1 = Inventory('/media/joncrall/raid/Applications/NotGames', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications/NotGames', blocklist)
    # inv1 = Inventory('/media/joncrall/raid/Applications', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications', blocklist)

    self = inv1  # NOQA

    inv1.build()
    inv2.build()

    thresh = {
        'frac': 0.5,
        'byte':
        100 * int(2**20)  # only use the first few mb to determine overlap
    }
    verbose = 1
    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles
    overlap, only1, only2 = ProgressiveFile.likely_overlaps(pfiles1,
                                                            pfiles2,
                                                            thresh=thresh,
                                                            verbose=verbose)

    stats = {
        'overlap': len(overlap),
        'only1': len(only1),
        'only2': len(only2),
    }
    print('stats = {}'.format(ub.repr2(stats, nl=1)))
    only2_list = sorted([p.fpath for group in only2.values() for p in group])
    print('only2_list = {}'.format(ub.repr2(only2_list, nl=1)))
    print('stats = {}'.format(ub.repr2(stats, nl=1)))

    # for pfile in inv1.pfiles:
    #     pfile._check_integrity()

    import numpy as np
    mb_read = np.array([
        pfile._parts[-1][1] / int(2**20) for pfile in ub.ProgIter(inv2.pfiles)
    ])
    mb_read.max()
    mb_read.min()

    # Build all hashes up to a reasonable degree
    inv1.build_hashes(max_workers=0)

    maybe_dups = inv1.likely_duplicates(thresh=0.2)
    len(maybe_dups)

    maybe_dups = ub.sorted_keys(maybe_dups, key=lambda x: x[2])

    import networkx as nx
    import itertools as it
    # Check which directories are most likely to be duplicates
    graph = nx.Graph()

    for key, group in ub.ProgIter(maybe_dups.items(),
                                  total=len(maybe_dups),
                                  desc='build dup dir graph'):
        if key[0] == '':
            continue
        dpaths = [dirname(pfile.fpath) for pfile in group]
        for d1, d2 in it.combinations(dpaths, 2):
            graph.add_edge(d1, d2)
            edge = graph.edges[(d1, d2)]
            if 'dups' not in edge:
                edge['dups'] = 0
            edge['dups'] += 1

    edge_data = list(graph.edges(data=True))

    for dpath in ub.ProgIter(graph.nodes, desc='find lens'):
        num_children = len(os.listdir(dpath))
        graph.nodes[dpath]['num_children'] = num_children

    for d1, d2, dat in edge_data:
        nc1 = graph.nodes[d1]['num_children']
        nc2 = graph.nodes[d2]['num_children']
        ndups = dat['dups']
        dup_score = (dat['dups'] / min(nc1, nc2))
        dat['dup_score'] = dup_score
        if dup_score > 0.9:
            print('dup_score = {!r}'.format(dup_score))
            print('d1 = {!r}'.format(d1))
            print('d2 = {!r}'.format(d2))
            print('nc1 = {!r}'.format(nc1))
            print('nc2 = {!r}'.format(nc2))
            print('ndups = {!r}'.format(ndups))

    print('edge_data = {}'.format(ub.repr2(edge_data, nl=2)))

    print('maybe_dups = {}'.format(ub.repr2(maybe_dups.keys(), nl=3)))
    for key, group in maybe_dups.items():
        if key[0] == '':
            continue
        print('key = {!r}'.format(key))
        print('group = {}'.format(ub.repr2(group, nl=1)))
        for pfile in group:
            pfile.refined_to(float('inf'))

        print('key = {!r}'.format(key))

    inv2.build_hashes(max_workers=6, mode='thread')

    inv1.pfiles = [
        p for p in ub.ProgIter(inv1.pfiles, desc='exist check')
        if exists(p.fpath)
    ]
    inv2.pfiles = [
        p for p in ub.ProgIter(inv2.pfiles, desc='exist check')
        if exists(p.fpath)
    ]

    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles

    def compute_likely_overlaps(pfiles1, pfiles2):
        step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1)
        step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2)
        step_idx = min(step_idx1, step_idx2)
        grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx)
        grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx)

        thresh = 0.2
        verbose = 1

        # TODO: it would be nice if we didn't have to care about internal
        # deduplication when we attempt to find cross-set overlaps
        dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)
        dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        pfiles = inv1.pfiles + inv2.pfiles
        dups3 = ProgressiveFile.likely_duplicates(pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        only_on_inv2 = {}
        for key, group in dups3.items():
            if not any(
                    item.fpath.startswith(inv1.root_fpath) for item in group):
                only_on_inv2[key] = group

        for p1 in inv1.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath:
                break

        for p2 in inv2.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath:
                break

        look = list(ub.flatten(only_on_inv2.values()))
        takealook = sorted([p.fpath for p in look])
        print('takealook = {}'.format(ub.repr2(takealook, nl=1)))

        keys1 = set(grouped1)
        keys2 = set(grouped2)

        missing_keys2 = keys2 - keys1
        missing_groups2 = ub.dict_subset(grouped2, missing_keys2)

        missing_fpaths2 = []
        for key, values in missing_groups2.items():
            print('key = {!r}'.format(key))
            print('values = {}'.format(ub.repr2(values, nl=1)))
            missing_fpaths2.extend(values)

        missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2])
        print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1)))
        # pass

        import xdev
        set_overlaps = xdev.set_overlaps(keys1, keys2)
        print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
        # We want to know what files in set2 do not exist in set1

    if 0:
        fpath = inv1.all_fpaths[0]
        pfile = ProgressiveFile(fpath)

        fpath1 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Transfer/Zebras/DownloadedLibraries/lightspeed/solve_triu.m'
        fpath2 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Zebras/downloaded_libraries/lightspeed/solve_triu.m'

        fpath1 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Falco/DarkFalco02.pcs'
        fpath2 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Ivysaur/Kraid-v2-Ivy.pcs'

        pfile = pfile1 = ProgressiveFile(fpath1)
        pfile2 = ProgressiveFile(fpath2)

        pfile.maybe_equal(pfile2, thresh=0.1)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]
        # fpaths = hash_groups1_dup['ef46db3751d8e999']
        pfiles_demodata = [ProgressiveFile(f) for f in fpath_demodata]

        def progressive_duplicates(pfiles, idx=1):
            step_ids = [pfile.refined_to(idx) for pfile in ub.ProgIter(pfiles)]
            final_groups = {}
            grouped = ub.group_items(pfiles, step_ids)
            for key, group in grouped.items():
                if len(group) > 1:
                    if all(not g.can_refine for g in group):
                        # Group is ~100% a real duplicate
                        final_groups[key] = group
                    else:
                        pfiles = group
                        deduped = progressive_duplicates(pfiles, idx=idx + 1)
                        final_groups.update(deduped)
                else:
                    final_groups[key] = group
            return final_groups

        pfiles = pfiles_demodata
        final_groups = progressive_duplicates(pfiles)

        for key, group in final_groups.items():
            if len(group) > 1:
                print('key = {!r}'.format(key))
                print('group = {}'.format(ub.repr2(group, nl=1)))

        inv1.build_hashes()
        inv2.build_hashes()

        hash_groups1 = ub.group_items(inv1.all_fpaths, inv1.all_hashes)
        hash_groups2 = ub.group_items(inv2.all_fpaths, inv2.all_hashes)

        hash_groups1_dup = {
            k: v
            for k, v in hash_groups1.items() if len(v) > 1
        }
        hash_groups2_dup = {
            k: v
            for k, v in hash_groups2.items() if len(v) > 1
        }
        len(hash_groups1_dup)
        len(hash_groups2_dup)

        # common = set(hash_groups1) & set(hash_groups2)
        # xdev.set_overlaps(hash_groups1, hash_groups2)

        fnames1 = ub.group_items(inv1.all_fpaths, key=basename)
        fnames2 = ub.group_items(inv2.all_fpaths, key=basename)

        missing = ub.dict_diff(fnames2, fnames1)
        sorted(ub.flatten(missing.values()))
        len(missing)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]

        def internal_deduplicate(self):
            hash_groups = ub.group_items(self.all_fpaths, self.all_hashes)
            hash_groups_dup = {
                k: v
                for k, v in hash_groups.items() if len(v) > 1
            }

            from os.path import dirname

            hash_groups_dup['ef46db3751d8e999']

            for key, values in hash_groups_dup.items():
                for v in values:
                    if v.endswith('.avi'):
                        break

                [basename(v) for v in values]
                [dirname(v) for v in values]
Exemple #22
0
 def unique(self):
     """
     Returns the unique channels that will need to be given or loaded
     """
     return set(ub.flatten(self.parse().values()))
Exemple #23
0
aug_graph = graph.copy()

# remove cut edges from augmented graph
edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut')
cut_edges = [
    (u, v) for (u, v, d) in aug_graph.edges(data=True)
    if not (d.get('is_cut') or d.get('decision', 'unreviewed') in ['nomatch'])
]
cut_edges = [edge for edge, flag in edge_to_iscut.items() if flag]
aug_graph.remove_edges_from(cut_edges)

# Enumerate cliques inside labels
unflat_edges = [
    list(ub.iter_window(nodes, 2)) for nodes in label_to_nodes.values()
]
node_pairs = [tup for tup in ub.flatten(unflat_edges) if tup[0] != tup[1]]

# Remove candidate MST edges that exist in the original graph
orig_edges = list(aug_graph.edges())
candidate_mst_edges = [
    edge for edge in node_pairs if not aug_graph.has_edge(*edge)
]
# randomness prevents chains and visually looks better
rng = np.random.RandomState(42)


def _randint():
    return 0
    return rng.randint(0, 100)

Exemple #24
0
def demodata_bridge():
    # define 2-connected compoments and bridges
    cc2 = [(1, 2, 4, 3, 1, 4), (8, 9, 10, 8), (11, 12, 13, 11)]
    bridges = [(4, 8), (3, 5), (20, 21), (22, 23, 24)]
    G = nx.Graph(ub.flatten(ub.iter_window(path, 2) for path in cc2 + bridges))
    return G
Exemple #25
0
def main(bib_fpath=None):
    r"""
    intro point to fixbib script

    CommmandLine:
        fixbib
        python -m fixtex bib
        python -m fixtex bib --dryrun
        python -m fixtex bib --dryrun --debug
    """

    if bib_fpath is None:
        bib_fpath = 'My Library.bib'

    # DEBUG = ub.argflag('--debug')
    # Read in text and ensure ascii format
    dirty_text = ut.readfrom(bib_fpath)

    from fixtex.fix_tex import find_used_citations, testdata_fpaths

    if exists('custom_extra.bib'):
        extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False)
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        print('Parsing extra bibtex file')
        extra_text = ut.readfrom('custom_extra.bib')
        extra_database = extra_parser.parse(extra_text, partial=False)
        print('Finished parsing extra')
        extra_dict = extra_database.get_entry_dict()
    else:
        extra_dict = None

    #udata = dirty_text.decode("utf-8")
    #dirty_text = udata.encode("ascii", "ignore")
    #dirty_text = udata

    # parser = bparser.BibTexParser()
    # bib_database = parser.parse(dirty_text)
    # d = bib_database.get_entry_dict()

    print('BIBTEXPARSER LOAD')
    parser = bparser.BibTexParser(ignore_nonstandard_types=False,
                                  common_strings=True)
    ut.delete_keys(parser.alt_dict, ['url', 'urls'])
    print('Parsing bibtex file')
    bib_database = parser.parse(dirty_text, partial=False)
    print('Finished parsing')

    bibtex_dict = bib_database.get_entry_dict()
    old_keys = list(bibtex_dict.keys())
    new_keys = []
    for key in ub.ProgIter(old_keys, label='fixing keys'):
        new_key = key
        new_key = new_key.replace(':', '')
        new_key = new_key.replace('-', '_')
        new_key = re.sub('__*', '_', new_key)
        new_keys.append(new_key)

    # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict'
    assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict'

    for key, new_key in zip(old_keys, new_keys):
        if key != new_key:
            entry = bibtex_dict[key]
            entry['ID'] = new_key
            bibtex_dict[new_key] = entry
            del bibtex_dict[key]

    # The bibtext is now clean. Print it to stdout
    #print(clean_text)
    verbose = None
    if verbose is None:
        verbose = 1

    # Find citations from the tex documents
    key_list = None
    if key_list is None:
        cacher = ub.Cacher('texcite1', enabled=0)
        data = cacher.tryload()
        if data is None:
            fpaths = testdata_fpaths()
            key_list, inverse = find_used_citations(fpaths,
                                                    return_inverse=True)
            # ignore = ['JP', '?', 'hendrick']
            # for item in ignore:
            #     try:
            #         key_list.remove(item)
            #     except ValueError:
            #         pass
            if verbose:
                print('Found %d citations used in the document' %
                      (len(key_list), ))
            data = key_list, inverse
            cacher.save(data)
        key_list, inverse = data

    # else:
    #     key_list = None

    unknown_pubkeys = []
    debug_author = ub.argval('--debug-author', default=None)
    # ./fix_bib.py --debug_author=Kappes

    if verbose:
        print('Fixing %d/%d bibtex entries' %
              (len(key_list), len(bibtex_dict)))

    # debug = True
    debug = False
    if debug_author is not None:
        debug = False

    known_keys = list(bibtex_dict.keys())
    missing_keys = set(key_list) - set(known_keys)
    if extra_dict is not None:
        missing_keys.difference_update(set(extra_dict.keys()))

    if missing_keys:
        print('The library is missing keys found in tex files %s' %
              (ub.repr2(missing_keys), ))

    # Search for possible typos:
    candidate_typos = {}
    sedlines = []
    for key in missing_keys:
        candidates = ut.closet_words(key, known_keys, num=3, subset=True)
        if len(candidates) > 1:
            top = candidates[0]
            if ut.edit_distance(key, top) == 1:
                # "sed -i -e 's/{}/{}/g' *.tex".format(key, top)
                import os
                replpaths = ' '.join(
                    [relpath(p, os.getcwd()) for p in inverse[key]])
                sedlines.append("sed -i -e 's/{}/{}/g' {}".format(
                    key, top, replpaths))
        candidate_typos[key] = candidates
        print('Cannot find key = %r' % (key, ))
        print('Did you mean? %r' % (candidates, ))

    print('Quick fixes')
    print('\n'.join(sedlines))

    # group by file
    just = max([0] + list(map(len, missing_keys)))
    missing_fpaths = [inverse[key] for key in missing_keys]
    for fpath in sorted(set(ub.flatten(missing_fpaths))):
        # ut.fix_embed_globals()
        subkeys = [k for k in missing_keys if fpath in inverse[k]]
        print('')
        ut.cprint('--- Missing Keys ---', 'blue')
        ut.cprint('fpath = %r' % (fpath, ), 'blue')
        ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'),
                  'blue')
        for key in subkeys:
            print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'),
                                   ' '.join(candidate_typos[key])))

    # for key in list(bibtex_dict.keys()):

    if extra_dict is not None:
        # Extra database takes precidence over regular
        key_list = list(ut.unique(key_list + list(extra_dict.keys())))
        for k, v in extra_dict.items():
            bibtex_dict[k] = v

    full = ub.argflag('--full')

    for key in key_list:
        try:
            entry = bibtex_dict[key]
        except KeyError:
            continue
        self = BibTexCleaner(key, entry, full=full)

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')

        if debug:
            ut.cprint(' --- ENTRY ---', 'yellow')
            print(ub.repr2(entry, nl=1))

        entry = self.fix()
        # self.clip_abstract()
        # self.shorten_keys()
        # self.fix_authors()
        # self.fix_year()
        # old_pubval = self.fix_pubkey()
        # if old_pubval:
        #     unknown_pubkeys.append(old_pubval)
        # self.fix_arxiv()
        # self.fix_general()
        # self.fix_paper_types()

        if debug:
            print(ub.repr2(entry, nl=1))
            ut.cprint(' --- END ENTRY ---', 'yellow')
        bibtex_dict[key] = entry

    unwanted_keys = set(bibtex_dict.keys()) - set(key_list)
    if verbose:
        print('Removing unwanted %d entries' % (len(unwanted_keys)))
    ut.delete_dict_keys(bibtex_dict, unwanted_keys)

    if 0:
        d1 = bibtex_dict.copy()
        full = True
        for key, entry in d1.items():
            self = BibTexCleaner(key, entry, full=full)
            pub = self.publication()
            if pub is None:
                print(self.entry['ENTRYTYPE'])

            old = self.fix_pubkey()
            x1 = self._pubval()
            x2 = self.standard_pubval(full=full)
            # if x2 is not None and len(x2) > 5:
            #     print(ub.repr2(self.entry))

            if x1 != x2:
                print('x2 = %r' % (x2, ))
                print('x1 = %r' % (x1, ))
                print(ub.repr2(self.entry))

            # if 'CVPR' in self.entry.get('booktitle', ''):
            #     if 'CVPR' != self.entry.get('booktitle', ''):
            #         break
            if old:
                print('old = %r' % (old, ))
            d1[key] = self.entry

    if full:
        d1 = bibtex_dict.copy()

        import numpy as np
        import pandas as pd
        df = pd.DataFrame.from_dict(d1, orient='index')

        paged_items = df[~pd.isnull(df['pub_accro'])]
        has_pages = ~pd.isnull(paged_items['pages'])
        print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
        print(ub.repr2(paged_items[~has_pages]['title'].values.tolist()))

        entrytypes = dict(list(df.groupby('pub_type')))
        if False:
            # entrytypes['misc']
            g = entrytypes['online']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            entrytypes['book']
            entrytypes['thesis']
            g = entrytypes['article']
            g = entrytypes['incollection']
            g = entrytypes['conference']

        def lookup_pub(e):
            if e == 'article':
                return 'journal', 'journal'
            elif e == 'incollection':
                return 'booksection', 'booktitle'
            elif e == 'conference':
                return 'conference', 'booktitle'
            return None, None

        for e, g in entrytypes.items():
            print('e = %r' % (e, ))
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            if 'pub_full' in g.columns:
                place_title = g['pub_full'].tolist()
                print(ub.repr2(ub.dict_hist(place_title)))
            else:
                print('Unknown publications')

        if 'report' in entrytypes:
            g = entrytypes['report']
            missing = g[pd.isnull(g['title'])]
            if len(missing):
                print('Missing Title')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'journal' in entrytypes:
            g = entrytypes['journal']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['journal'])]
            if len(missing):
                print('Missing Journal')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'conference' in entrytypes:
            g = entrytypes['conference']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'incollection' in entrytypes:
            g = entrytypes['incollection']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'thesis' in entrytypes:
            g = entrytypes['thesis']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            missing = g[pd.isnull(g['institution'])]
            if len(missing):
                print('Missing Institution')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        # import utool
        # utool.embed()

    # Overwrite BibDatabase structure
    bib_database._entries_dict = bibtex_dict
    bib_database.entries = list(bibtex_dict.values())

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ub.repr2(conftitle_to_types_set_hist))

    print('Unknown conference keys:')
    print(ub.repr2(sorted(unknown_pubkeys)))
    print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)

    # Need to check
    #jegou_aggregating_2012

    # Fix the Journal Abreviations
    # References:
    # https://www.ieee.org/documents/trans_journal_names.pdf

    # Write out clean bibfile in ascii format
    clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean')

    if not ub.argflag('--dryrun'):
        ut.writeto(clean_bib_fpath, new_bibtex_str)
Exemple #26
0
    def __init__(self,
                 categories=None,
                 fg_scale=0.5,
                 fg_intensity=0.9,
                 rng=None):
        """
        Args:
            categories (List[Dict]): List of coco category dictionaries
        """
        self.rng = kwarray.ensure_rng(rng)
        self.fg_scale = fg_scale
        self.fg_intensity = fg_intensity

        self._category_to_elemfunc = {
            'superstar':
            lambda x: Rasters.superstar(),
            'eff':
            lambda x: Rasters.eff(),
            'box':
            lambda x: (skimage.morphology.square(x), None),
            'star':
            lambda x: (star(x), None),
            'circle':
            lambda x: (skimage.morphology.disk(x), None),
            'octagon':
            lambda x:
            (skimage.morphology.octagon(x // 2, int(x /
                                                    (2 * np.sqrt(2)))), None),
            'diamond':
            lambda x: (skimage.morphology.diamond(x), None),
        }
        # Make generation of shapes a bit faster?
        # Maybe there are too many input combinations for this?
        # If we only allow certain size generations it should be ok

        # for key in self._category_to_elemfunc.keys():
        #     self._category_to_elemfunc[key] = ub.memoize(self._category_to_elemfunc[key])

        # keep track of which keypoints belong to which categories
        self.categories = categories
        self.cname_to_kp = {
            c['name']: c.get('keypoints', [])
            for c in self.categories
        }

        self.obj_catnames = sorted([c['name'] for c in self.categories])
        self.kp_catnames = sorted(ub.flatten(self.cname_to_kp.values()))

        kpname_to_cat = {
            c['name']: c
            for c in CategoryPatterns._default_keypoint_categories
        }
        self.keypoint_categories = list(
            ub.take(kpname_to_cat, self.kp_catnames))

        # flatten list of all keypoint categories
        # self.kp_catnames = list(
        #     ub.flatten([self.cname_to_kp.get(cname, [])
        #                 for cname in self.obj_catnames])
        # )
        self.cname_to_cid = {cat['name']: cat['id'] for cat in self.categories}
        self.cname_to_cx = {
            cat['name']: cx
            for cx, cat in enumerate(self.categories)
        }
Exemple #27
0
 def assert_recovery_invariant(infr, msg=''):
     if not DEBUG_INCON:
         return
     # infr.print('assert_recovery_invariant', 200)
     inconsistent_ccs = list(infr.inconsistent_components())
     incon_cc = set(ub.flatten(inconsistent_ccs))  # NOQA
Exemple #28
0
    def measure_metrics(infr):
        real_pos_edges = []

        n_true_merges = infr.test_state['n_true_merges']
        confusion = infr.test_state['confusion']

        n_tp = confusion[POSTV][POSTV]
        confusion[POSTV]
        columns = set(confusion.keys())
        reviewd_cols = columns - {UNREV}
        non_postv = reviewd_cols - {POSTV}
        non_negtv = reviewd_cols - {NEGTV}

        n_fn = sum(ub.take(confusion[POSTV], non_postv))
        n_fp = sum(ub.take(confusion[NEGTV], non_negtv))

        n_error_edges = sum(confusion[r][c] + confusion[c][r]
                            for r, c in it.combinations(reviewd_cols, 2))
        # assert n_fn + n_fp == n_error_edges

        pred_n_pcc_mst_edges = n_true_merges

        # Find all annotations involved in a mistake
        assert n_error_edges == len(infr.mistake_edges)
        direct_mistake_aids = {a for edge in infr.mistake_edges for a in edge}
        mistake_nids = set(infr.node_labels(*direct_mistake_aids))
        mistake_aids = set(
            ub.flatten([infr.pos_graph.component(nid)
                        for nid in mistake_nids]))

        pos_acc = pred_n_pcc_mst_edges / infr.real_n_pcc_mst_edges
        metrics = {
            'n_decision':
            infr.test_state['n_decision'],
            'n_manual':
            infr.test_state['n_manual'],
            'n_algo':
            infr.test_state['n_algo'],
            'phase':
            infr.loop_phase,
            'pos_acc':
            pos_acc,
            'n_merge_total':
            infr.real_n_pcc_mst_edges,
            'n_merge_remain':
            infr.real_n_pcc_mst_edges - n_true_merges,
            'n_true_merges':
            n_true_merges,
            'recovering':
            infr.is_recovering(),
            # 'recovering2': infr.test_state['recovering'],
            'merge_remain':
            1 - pos_acc,
            'n_mistake_aids':
            len(mistake_aids),
            'frac_mistake_aids':
            len(mistake_aids) / len(infr.aids),
            'n_mistake_nids':
            len(mistake_nids),
            'n_errors':
            n_error_edges,
            'n_fn':
            n_fn,
            'n_fp':
            n_fp,
            'refresh_support':
            len(infr.refresh.manual_decisions),
            'pprob_any':
            infr.refresh.prob_any_remain(),
            'mu':
            infr.refresh._ewma,
            'test_action':
            infr.test_state['test_action'],
            'action':
            infr.test_state.get('action', None),
            'user_id':
            infr.test_state['user_id'],
            'pred_decision':
            infr.test_state['pred_decision'],
            'true_decision':
            infr.test_state['true_decision'],
            'n_neg_redun':
            infr.neg_redun_metagraph.number_of_edges(),
            'n_neg_redun1': (infr.neg_metagraph.number_of_edges() -
                             infr.neg_metagraph.number_of_selfloops()),
        }

        return metrics
Exemple #29
0
def new_video_sample_grid(dset, window_dims, window_overlap=0.0,
                          classes_of_interest=None, ignore_coverage_thresh=0.6,
                          negative_classes={'ignore', 'background'}):
    """
    Create a space time-grid to sample with

    Example:
        >>> from ndsampler.coco_regions import *  # NOQA
        >>> import kwcoco
        >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral', num_frames=5)
        >>> dset.conform()
        >>> window_dims = (2, 224, 224)
        >>> sample_grid = new_video_sample_grid(dset, window_dims)
        >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2)))
        >>> # Now try to load a sample
        >>> tr = sample_grid['positives'][0]
        >>> import ndsampler
        >>> sampler = ndsampler.CocoSampler(dset)
        >>> tr_ = sampler._infer_target_attributes(tr)
        >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
        >>> sample = sampler.load_sample(tr)
        >>> assert sample['im'].shape == (2, 224, 224, 5)

    Ignore:
        import xdev
        globals().update(xdev.get_func_kwargs(new_video_sample_grid))
    """
    import kwarray
    from ndsampler import isect_indexer
    keepbound = True

    if classes_of_interest:
        raise NotImplementedError

    # Create a sliding window object for each specific image (because they may
    # have different sizes, technically we could memoize this)
    vidid_to_slider = {}
    for vidid, video in dset.index.videos.items():
        gids = dset.index.vidid_to_gids[vidid]
        num_frames = len(gids)
        full_dims = [num_frames, video['height'], video['width']]
        window_dims_ = full_dims if window_dims == 'full' else window_dims
        slider = kwarray.SlidingWindow(full_dims, window_dims_,
                                       overlap=window_overlap,
                                       keepbound=keepbound,
                                       allow_overshoot=True)

        vidid_to_slider[vidid] = slider

    _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset)

    positives = []
    negatives = []
    for vidid, slider in vidid_to_slider.items():
        regions = list(slider)
        gids = dset.index.vidid_to_gids[vidid]
        boxes = []
        box_gids = []
        for region in regions:
            t_sl, y_sl, x_sl = region
            region_gids = gids[t_sl]
            box_gids.append(region_gids)
            boxes.append([x_sl.start,  y_sl.start, x_sl.stop, y_sl.stop])
        boxes = kwimage.Boxes(np.array(boxes), 'ltrb')

        for region, region_gids, box in zip(regions, box_gids, boxes):
            # Check to see what annotations this window-box overlaps with
            region_aids = []
            for gid in region_gids:
                # TODO: memoize to prevent dup queries (box is not hashable)
                aids = _isect_index.overlapping_aids(gid, box)
                region_aids.append(aids)

            pos_aids = sorted(ub.flatten(region_aids))
            space_slice = region[1:3]
            time_slice = region[0]

            tr = {
                'vidid': vidid,
                'time_slice': time_slice,
                'space_slice': space_slice,
                # 'slices': region,
                'gids': region_gids,
                'aids': pos_aids,
            }
            if len(pos_aids):
                positives.append(tr)
            else:
                negatives.append(tr)

    print('Found {} positives'.format(len(positives)))
    print('Found {} negatives'.format(len(negatives)))
    sample_grid = {
        'positives': positives,
        'negatives': negatives,
    }
    return sample_grid
Exemple #30
0
def pvp_inventory():
    """
    The idea is you put info about your candidates here and we find good mons
    to power up.
    """
    inventory = [
        Pokemon('Magnezone', (14, 14, 14), cp=1815, form='Normal'),
        Pokemon('Magnemite', (7, 14, 9), cp=792),
        Pokemon('Magnemite', (10, 14, 13), cp=747),
        Pokemon('Magnemite', (13, 9, 15), cp=602),
        Pokemon('Magneton', (13, 14, 13), cp=550, form='Shadow'),
        Pokemon('Magnemite', (15, 13, 7), cp=293, form='Shadow'),
        Pokemon('Magnemite', (2, 14, 15), cp=283, form='Shadow'),
    ]

    inventory = [
        Pokemon('sirfetch’d', (4, 11, 12), cp=1924, form='Galarian'),
        Pokemon('farfetch’d', (12, 15, 15), cp=1495, form='Galarian'),
        Pokemon('farfetch’d', (14, 14, 15), cp=948, form='Galarian'),
    ]

    inventory = [
        Pokemon('bulbasaur', (7, 13, 12), cp=382, form='Shadow'),
        Pokemon('bulbasaur', (4, 8, 13), cp=366, form='Shadow'),
        Pokemon('bulbasaur', (7, 12, 8), cp=227, form='Shadow'),
    ]

    inventory = [
        Pokemon('Clefable', (12, 13, 12), cp=1828),
        Pokemon('Clefairy', (4, 2, 7), cp=389),
    ]

    inventory = [
        Pokemon('Jigglypuff', (10, 14, 15), cp=631),
        Pokemon('Jigglypuff', (10, 12, 15), cp=286),
    ]

    inventory = [
        Pokemon('poliwag', (10, 13, 14), cp=335),
        Pokemon('poliwag', (10, 14, 13), cp=335),
    ]

    inventory = [
        Pokemon('drifloon', (15, 15, 1), cp=695),
        Pokemon('drifloon', (0, 9, 14), cp=527),
        Pokemon('drifloon', (15, 15, 12), cp=509),
        Pokemon('drifloon', (14, 15, 14), cp=508),
        Pokemon('drifloon', (14, 11, 14), cp=497),
        Pokemon('drifloon', (11, 13, 12), cp=489, shiny=True),
        Pokemon('drifloon', (0, 4, 8), cp=336),
        Pokemon('drifloon', (12, 10, 12), cp=118),
    ]

    inventory = [
        Pokemon('shelmet', (10, 15, 8), cp=351),
        Pokemon('shelmet', (0, 13, 0), cp=166),
        Pokemon('shelmet', (15, 10, 12), cp=158),
    ]

    inventory = [
        Pokemon('Karrablast', (10, 4, 12), cp=824),
        Pokemon('Karrablast', (13, 13, 13), cp=655),
        Pokemon('Karrablast', (13, 14, 15), cp=16),
    ]

    inventory = [
        Pokemon('Ralts', (14, 14, 13)),
        Pokemon('Ralts', (14, 11, 12)),
        Pokemon('Ralts', (0, 11, 0), shadow=True),
        Pokemon('Ralts', (1, 14, 2), shadow=True),
        Pokemon('Ralts', (12, 12, 6), shadow=True),
        Pokemon('Ralts', (5, 14, 14)),
        Pokemon('Ralts', (7, 11, 11)),
    ]

    inventory = [
        Pokemon('Toxicroak', (11, 13, 14)),
        Pokemon('Croagunk', (9, 11, 13), cp=794),
        Pokemon('Croagunk', (8, 6, 8), cp=429),
    ]

    inventory = [
        Pokemon('Snorlax', (7, 6, 13), shadow=True),
        Pokemon('Snorlax', (0, 0, 13), shadow=0),
        Pokemon('Snorlax', (8, 15, 14), shadow=0, cp=1155),
        Pokemon('Snorlax', (8, 12, 11), shadow=0, cp=2106),
        Pokemon('Snorlax', (9, 15, 10), shadow=0, cp=2487),
        Pokemon('Snorlax', (1, 15, 14), shadow=0, cp=1372),
        Pokemon('Snorlax', (7, 11, 15), shadow=0, cp=3044),
        Pokemon('Snorlax', (2, 15, 1), shadow=1),
        Pokemon('Munchlax', (14, 11, 14), shadow=0, cp=1056),
    ]

    inventory = [
        Pokemon('Obstagoon', (11, 15, 13), cp=1478, form='Galarian'),
        Pokemon('zigzagoon', (10, 14, 14), cp=268, form='Galarian'),
        Pokemon('zigzagoon', (11, 12, 13), cp=268, form='Galarian'),
        Pokemon('zigzagoon', (11, 12, 15), cp=270, form='Galarian'),
        Pokemon('zigzagoon', (12, 11, 15), cp=272, form='Galarian'),
    ]

    inventory = [
        Pokemon('Meditite', (5, 12, 4), cp=25),
        Pokemon('Medicham', (14, 12, 12), cp=1116),
        Pokemon('Medicham', (15, 15, 10), cp=966),
    ]

    for self in inventory:
        list(self.family())

    candidates = list(
        ub.flatten(list(pkmn.family(ancestors=False))
                   for pkmn in inventory)) + inventory

    groups = ub.group_items(candidates, key=lambda p: p.name)

    leages = {
        'master': {
            'max_cp': float('inf')
        },
        'ultra': {
            'max_cp': 2500
        },
        'great': {
            'max_cp': 1500
        },
        'little': {
            'max_cp': 500
        },
    }

    max_level = 45  # for XL candy
    max_level = 40  # normal

    for name, group in groups.items():
        print('\n\n------------\n\n')
        print('name = {!r}'.format(name))
        for leage_name, leage_filters in leages.items():
            max_cp = leage_filters['max_cp']
            print('')
            print(' ========== ')
            print(' --- {} in {} --- '.format(name, leage_name))
            not_eligible = [
                p for p in group if p.cp is not None and p.cp > max_cp
            ]
            print('not_eligible = {!r}'.format(not_eligible))
            have_ivs = [p.ivs for p in group if p.cp is None or p.cp <= max_cp]
            if len(have_ivs) > 0:
                first = ub.peek(group)
                first.leage_rankings_for(have_ivs,
                                         max_cp=max_cp,
                                         max_level=max_level)
            else:
                print('none eligable')