def update_neighbors(self): # TODO: this should be done with a fast spatial index, but # unfortunately I don't see any existing implementations that make it # easy to support moving points. utriu_dists = pdist(self.pos) utriu_flags = utriu_dists < self.config['perception_thresh'] utriu_rx, utriu_cx = np.triu_indices(len(self.pos), k=1) utriu_neighb_rxs = utriu_rx[utriu_flags] utriu_neighb_cxs = utriu_cx[utriu_flags] neighb_rxs = np.r_[utriu_neighb_rxs, utriu_neighb_cxs] neighb_cxs = np.r_[utriu_neighb_cxs, utriu_neighb_rxs] group_rxs, groupxs = kwarray.group_indices(neighb_rxs) group_cxs = kwarray.apply_grouping(neighb_cxs, groupxs) rx_to_neighb_cxs = ub.dzip(group_rxs, group_cxs) # n = len(self.pos) # rx_to_neighb_utriu_idxs = {} # for rx, cxs in rx_to_neighb_cxs.items(): # rxs = np.full_like(cxs, fill_value=rx) # multi_index = (rxs, cxs) # utriu_idxs = triu_condense_multi_index( # multi_index, dims=(n, n), symetric=True) # rx_to_neighb_utriu_idxs[rx] = utriu_idxs # self.utriu_dists = utriu_dists self.rx_to_neighb_cxs = rx_to_neighb_cxs # self.rx_to_neighb_utriu_idxs = rx_to_neighb_utriu_idxs # Compute speed and direction of every boid self.speeds = np.linalg.norm(self.vel, axis=1) self.dirs = self.vel / self.speeds[:, None]
def setup_datasets(workdir=None): if workdir is None: workdir = ub.expandpath('~/data/mnist/') # Define your dataset transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), # torchvision.transforms.Normalize((0.1307,), (0.3081,)) ]) learn_dset = nh.data.MNIST(workdir, transform=transform, train=True, download=True) test_dset = nh.data.MNIST(workdir, transform=transform, train=False, download=True) # split the learning dataset into training and validation # take a subset of data factor = .15 n_vali = int(len(learn_dset) * factor) learn_idx = np.arange(len(learn_dset)) rng = np.random.RandomState(0) rng.shuffle(learn_idx) reduction = int(ub.argval('--reduction', default=1)) vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction]) train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction]) train_dset = torch.utils.data.Subset(learn_dset, train_idx) vali_dset = torch.utils.data.Subset(learn_dset, vali_idx) datasets = { 'train': train_dset, 'vali': vali_dset, 'test': test_dset, } if not ub.argflag('--test'): del datasets['test'] for tag, dset in datasets.items(): # Construct the PCCs (positive connected components) # These are groups of item indices which are positive matches if isinstance(dset, torch.utils.data.Subset): labels = dset.dataset.train_labels[dset.indices] else: labels = dset.labels unique_labels, groupxs = kwarray.group_indices(labels.numpy()) dset.pccs = [xs.tolist() for xs in groupxs] # Give the training dataset an input_id datasets['train'].input_id = 'mnist_' + ub.hash_data( train_idx.numpy())[0:8] return datasets, workdir
def select_positive_regions(targets, window_dims=(300, 300), thresh=0.0, rng=None, verbose=0): """ Reduce positive example redundency by selecting disparate positive samples Example: >>> from ndsampler.coco_regions import * >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> targets = tabular_coco_targets(dset) >>> window_dims = (300, 300) >>> selected = select_positive_regions(targets, window_dims) >>> print(len(selected)) >>> print(len(dset.anns)) """ unique_gids, groupxs = kwarray.group_indices(targets['gid']) gid_to_groupx = dict(zip(unique_gids, groupxs)) wh, ww = window_dims rng = kwarray.ensure_rng(rng) selection = [] # Get all the bounding boxes cxs, cys = ub.take(targets, ['cx', 'cy']) n = len(targets) cxs = cxs.astype(np.float32) cys = cys.astype(np.float32) wws = np.full(n, ww, dtype=np.float32) whs = np.full(n, wh, dtype=np.float32) cxywh = np.hstack([a[:, None] for a in [cxs, cys, wws, whs]]) boxes = kwimage.Boxes(cxywh, 'cxywh').to_tlbr() iter_ = ub.ProgIter(gid_to_groupx.items(), enabled=verbose, label='select positive regions', total=len(gid_to_groupx), adjust=0, freq=32) for gid, groupx in iter_: # Select all candiate windows in this image cand_windows = boxes.take(groupx, axis=0) # Randomize which candidate windows have the highest scores so the # selection can vary each epoch. cand_scores = rng.rand(len(cand_windows)) cand_dets = kwimage.Detections(boxes=cand_windows, scores=cand_scores) # Non-max supresssion is really similar to set-cover keep = cand_dets.non_max_supression(thresh=thresh) selection.extend(groupx[keep]) selection = np.array(sorted(selection)) return selection
def _descend(depth, nodes, jdxs): """ Recursively descend the class tree starting at the coursest level. At each level we decide if the items will take a category at this level of granulatority or try to take a more fine-grained label. Args: depth (int): current depth in the tree nodes (list) : set of sibling nodes at a this level jdxs (ArrayLike): item indices that made it to this level (note idxs are used for class indices) """ # Look at the probabilities of each node at this level idxs = sorted(self.node_to_idx[node] for node in nodes) probs = flat_class_probs[jdxs][:, idxs] pred_conf, pred_cx = impl.max_argmax(probs, axis=1) pred_idxs = np.array(idxs)[pred_cx] # Keep desending on items above the threshold # TODO: is there a more intelligent way to do this? check_children = pred_conf > thresh if impl.any(check_children): # Check the children of these nodes check_jdxs = jdxs[check_children] check_idxs = pred_idxs[check_children] group_idxs, groupxs = kwarray.group_indices(check_idxs) for idx, groupx in zip(group_idxs, groupxs): node = self.idx_to_node[idx] children = list(self.graph.successors(node)) if children: sub_jdxs = check_jdxs[groupx] # See if any fine-grained categories also have high # thresholds. sub_idxs, sub_conf = _descend(depth + 1, children, sub_jdxs) sub_flags = sub_conf > thresh # Overwrite course decisions with confident # fine-grained ones. fine_groupx = groupx[sub_flags] fine_idxs = sub_idxs[sub_flags] fine_conf = sub_conf[sub_flags] pred_conf[fine_groupx] = fine_conf pred_idxs[fine_groupx] = fine_idxs return pred_idxs, pred_conf
def labels_to_adjacency_matrix(labels, symmetric=True, diagonal=True): """ Construct an adjacency matrix of matching instances where `labels[i]` is the "name" or "identity" of the i-th item. The resulting matrix will have values adjm[i, j] == 1 if the i-th and j-th item have the same label and 0 otherwise. Args: labels (ndarray): array of labels symmetric (bool, default=True): if False only the upper triangle of the matrix is populated. diagonal (bool, default=True): if False the diagonal is set to zero. Returns: ndarray: adjm : adjacency matrix Example: >>> labels = np.array([0, 0, 1, 1]) >>> labels_to_adjacency_matrix(labels) array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 1, 1], [0, 0, 1, 1]], dtype=uint8) >>> labels_to_adjacency_matrix(labels, symmetric=False, diagonal=False) array([[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0]], dtype=uint8) """ import kwarray n = len(labels) adjm = np.zeros((n, n), dtype=np.uint8) unique_labels, groupxs = kwarray.group_indices(labels) pos_idxs = [(i, j) for g in groupxs for (i, j) in it.combinations(sorted(g), 2)] pos_multi_idxs = tuple(zip(*pos_idxs)) adjm[pos_multi_idxs] = 1 if symmetric: adjm += adjm.T if diagonal: np.fill_diagonal(adjm, 1) return adjm
def groupby(self, by=None, *args, **kwargs): """ Group rows by the value of a column. Unlike pandas this simply returns a zip object. To ensure compatiability call list on the result of groupby. Args: by (str): column name to group by *args: if specified, the dataframe is coerced to pandas *kwargs: if specified, the dataframe is coerced to pandas Example: >>> df_light = DataFrameLight._demodata(num=7) >>> res1 = list(df_light.groupby('bar')) >>> # xdoctest: +REQUIRES(module:pandas) >>> df_heavy = df_light.pandas() >>> res2 = list(df_heavy.groupby('bar')) >>> assert len(res1) == len(res2) >>> assert all([np.all(a[1] == b[1]) for a, b in zip(res1, res2)]) Ignore: >>> self = DataFrameLight._demodata(num=1000) >>> args = ['cx'] >>> self['cx'] = (np.random.rand(len(self)) * 10).astype(np.int) >>> # As expected, our custom restricted implementation is faster >>> # than pandas >>> ub.Timerit(100).call(lambda: dict(list(self.pandas().groupby('cx')))).print() >>> ub.Timerit(100).call(lambda: dict(self.groupby('cx'))).print() """ if len(args) == 0 and len(kwargs) == 0: # In this special case we can be fast import kwarray unique, groupxs = kwarray.group_indices(self[by]) groups = [self.take(idxs) for idxs in groupxs] return zip(unique, groups) else: # otherwise we need to use the slow method return self.pandas().groupby(by=by)
def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0, iou_thresh=0.5, bg_cidx=-1, bias=0.0, classes=None, compat='all', prioritize='iou', ignore_classes='ignore', max_dets=None): """ Create confusion vectors for detections by assigning to ground true boxes Given predictions and truth for an image return (y_pred, y_true, y_score), which is suitable for sklearn classification metrics Args: true_dets (Detections): groundtruth with boxes, classes, and weights pred_dets (Detections): predictions with boxes, classes, and scores iou_thresh (float, default=0.5): bounding box overlap iou threshold required for assignment bias (float, default=0.0): for computing bounding box overlap, either 1 or 0 gids (List[int], default=None): which subset of images ids to compute confusion metrics on. If not specified all images are used. compat (str, default='all'): can be ('ancestors' | 'mutex' | 'all'). determines which pred boxes are allowed to match which true boxes. If 'mutex', then pred boxes can only match true boxes of the same class. If 'ancestors', then pred boxes can match true boxes that match or have a coarser label. If 'all', then any pred can match any true, regardless of its category label. prioritize (str, default='iou'): can be ('iou' | 'class' | 'correct') determines which box to assign to if mutiple true boxes overlap a predicted box. if prioritize is iou, then the true box with maximum iou (above iou_thresh) will be chosen. If prioritize is class, then it will prefer matching a compatible class above a higher iou. If prioritize is correct, then ancestors of the true class are preferred over descendents of the true class, over unreleated classes. bg_cidx (int, default=-1): The index of the background class. The index used in the truth column when a predicted bounding box does not match any true bounding box. classes (List[str] | kwcoco.CategoryTree): mapping from class indices to class names. Can also contain class heirarchy information. ignore_classes (str | List[str]): class name(s) indicating ignore regions max_dets (int): maximum number of detections to consider TODO: - [ ] This is a bottleneck function. An implementation in C / C++ / Cython would likely improve the overall system. - [ ] Implement crowd truth. Allow multiple predictions to match any truth objet marked as "iscrowd". Returns: dict: with relevant confusion vectors. This keys of this dict can be interpreted as columns of a data frame. The `txs` / `pxs` columns represent the indexes of the true / predicted annotations that were assigned as matching. Additionally each row also contains the true and predicted class index, the predicted score, the true weight and the iou of the true and predicted boxes. A `txs` value of -1 means that the predicted box was not assigned to a true annotation and a `pxs` value of -1 means that the true annotation was not assigne to any predicted annotation. Example: >>> # xdoctest: +REQUIRES(module:pandas) >>> import pandas as pd >>> import kwimage >>> # Given a raw numpy representation construct Detection wrappers >>> true_dets = kwimage.Detections( >>> boxes=kwimage.Boxes(np.array([ >>> [ 0, 0, 10, 10], [10, 0, 20, 10], >>> [10, 0, 20, 10], [20, 0, 30, 10]]), 'tlbr'), >>> weights=np.array([1, 0, .9, 1]), >>> class_idxs=np.array([0, 0, 1, 2])) >>> pred_dets = kwimage.Detections( >>> boxes=kwimage.Boxes(np.array([ >>> [6, 2, 20, 10], [3, 2, 9, 7], >>> [3, 9, 9, 7], [3, 2, 9, 7], >>> [2, 6, 7, 7], [20, 0, 30, 10]]), 'tlbr'), >>> scores=np.array([.5, .5, .5, .5, .5, .5]), >>> class_idxs=np.array([0, 0, 1, 2, 0, 1])) >>> bg_weight = 1.0 >>> compat = 'all' >>> iou_thresh = 0.5 >>> bias = 0.0 >>> import kwcoco >>> classes = kwcoco.CategoryTree.from_mutex(list(range(3))) >>> bg_cidx = -1 >>> y = _assign_confusion_vectors(true_dets, pred_dets, bias=bias, >>> bg_weight=bg_weight, iou_thresh=iou_thresh, >>> compat=compat) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT pred true score weight iou txs pxs 0 1 2 0.5000 1.0000 1.0000 3 5 1 0 -1 0.5000 1.0000 -1.0000 -1 4 2 2 -1 0.5000 1.0000 -1.0000 -1 3 3 1 -1 0.5000 1.0000 -1.0000 -1 2 4 0 -1 0.5000 1.0000 -1.0000 -1 1 5 0 0 0.5000 0.0000 0.6061 1 0 6 -1 0 0.0000 1.0000 -1.0000 0 -1 7 -1 1 0.0000 0.9000 -1.0000 2 -1 Ignore: from xinspect.dynamic_kwargs import get_func_kwargs globals().update(get_func_kwargs(_assign_confusion_vectors)) Example: >>> # xdoctest: +REQUIRES(module:pandas) >>> import pandas as pd >>> from kwcoco.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo(nimgs=1, nclasses=8, >>> nboxes=(0, 20), n_fp=20, >>> box_noise=.2, cls_noise=.3) >>> classes = dmet.classes >>> gid = 0 >>> true_dets = dmet.true_detections(gid) >>> pred_dets = dmet.pred_detections(gid) >>> y = _assign_confusion_vectors(true_dets, pred_dets, >>> classes=dmet.classes, >>> compat='all', prioritize='class') >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT >>> y = _assign_confusion_vectors(true_dets, pred_dets, >>> classes=dmet.classes, >>> compat='ancestors', iou_thresh=.5) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT """ import kwarray valid_compat_keys = {'ancestors', 'mutex', 'all'} if compat not in valid_compat_keys: raise KeyError(compat) if classes is None and compat == 'ancestors': compat = 'mutex' if compat == 'mutex': prioritize = 'iou' # Group true boxes by class # Keep track which true boxes are unused / not assigned unique_tcxs, tgroupxs = kwarray.group_indices(true_dets.class_idxs) cx_to_txs = dict(zip(unique_tcxs, tgroupxs)) unique_pcxs = np.array(sorted(set(pred_dets.class_idxs))) if classes is None: import kwcoco # Build mutually exclusive category tree all_cxs = sorted( set(map(int, unique_pcxs)) | set(map(int, unique_tcxs))) all_cxs = list(range(max(all_cxs) + 1)) classes = kwcoco.CategoryTree.from_mutex(all_cxs) cx_to_ancestors = classes.idx_to_ancestor_idxs() if prioritize == 'iou': pdist_priority = None # TODO: cleanup else: pdist_priority = _fast_pdist_priority(classes, prioritize) if compat == 'mutex': # assume classes are mutually exclusive if hierarchy is not given cx_to_matchable_cxs = {cx: [cx] for cx in unique_pcxs} elif compat == 'ancestors': cx_to_matchable_cxs = { cx: sorted([cx] + sorted( ub.take(classes.node_to_idx, nx.ancestors(classes.graph, classes.idx_to_node[cx])))) for cx in unique_pcxs } elif compat == 'all': cx_to_matchable_cxs = {cx: unique_tcxs for cx in unique_pcxs} else: raise KeyError(compat) if compat == 'all': # In this case simply run the full pairwise iou common_true_idxs = np.arange(len(true_dets)) cx_to_matchable_txs = {cx: common_true_idxs for cx in unique_pcxs} common_ious = pred_dets.boxes.ious(true_dets.boxes, bias=bias) # common_ious = pred_dets.boxes.ious(true_dets.boxes, impl='c', bias=bias) iou_lookup = dict(enumerate(common_ious)) else: # For each pred-category find matchable true-indices cx_to_matchable_txs = {} for cx, compat_cx in cx_to_matchable_cxs.items(): matchable_cxs = cx_to_matchable_cxs[cx] compat_txs = ub.dict_take(cx_to_txs, matchable_cxs, default=[]) compat_txs = np.array(sorted(ub.flatten(compat_txs)), dtype=int) cx_to_matchable_txs[cx] = compat_txs # Batch up the IOU pre-computation between compatible truths / preds iou_lookup = {} unique_pred_cxs, pgroupxs = kwarray.group_indices(pred_dets.class_idxs) for cx, pred_idxs in zip(unique_pred_cxs, pgroupxs): true_idxs = cx_to_matchable_txs[cx] ious = pred_dets.boxes[pred_idxs].ious(true_dets.boxes[true_idxs], bias=bias) _px_to_iou = dict(zip(pred_idxs, ious)) iou_lookup.update(_px_to_iou) iou_thresh_list = ([iou_thresh] if not ub.iterable(iou_thresh) else iou_thresh) iou_thresh_to_y = {} for iou_thresh_ in iou_thresh_list: isvalid_lookup = { px: ious > iou_thresh_ for px, ious in iou_lookup.items() } y = _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, cx_to_matchable_txs, bg_weight, prioritize, iou_thresh_, pdist_priority, cx_to_ancestors, bg_cidx, ignore_classes=ignore_classes, max_dets=max_dets) iou_thresh_to_y[iou_thresh_] = y if ub.iterable(iou_thresh): return iou_thresh_to_y else: return y
def _make_test_folds(self, X, y=None, groups=None): """ Args: X (ndarray): data y (ndarray): labels groups (ndarray): groupids for items. Items with the same groupid must be placed in the same group. Returns: list: test_folds Example: >>> import kwarray >>> rng = kwarray.ensure_rng(0) >>> groups = [1, 1, 3, 4, 2, 2, 7, 8, 8] >>> y = [1, 1, 1, 1, 2, 2, 2, 3, 3] >>> X = np.empty((len(y), 0)) >>> self = StratifiedGroupKFold(random_state=rng, shuffle=True) >>> skf_list = list(self.split(X=X, y=y, groups=groups)) ... >>> import ubelt as ub >>> print(ub.repr2(skf_list, nl=1, with_dtype=False)) [ (np.array([2, 3, 4, 5, 6]), np.array([0, 1, 7, 8])), (np.array([0, 1, 2, 7, 8]), np.array([3, 4, 5, 6])), (np.array([0, 1, 3, 4, 5, 6, 7, 8]), np.array([2])), ] """ import kwarray with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'invalid value') n_splits = self.n_splits y = np.asarray(y) n_samples = y.shape[0] unique_y, y_inversed = np.unique(y, return_inverse=True) n_classes = max(unique_y) + 1 unique_groups, group_idxs = kwarray.group_indices(groups) grouped_y = kwarray.apply_grouping(y, group_idxs) grouped_y_counts = np.array([ np.bincount(y_, minlength=n_classes) for y_ in grouped_y]) target_freq = grouped_y_counts.sum(axis=0) target_freq = target_freq.astype(np.float) target_ratio = target_freq / float(target_freq.sum()) # Greedilly choose the split assignment that minimizes the local # * squared differences in target from actual frequencies # * and best equalizes the number of items per fold # Distribute groups with most members first split_freq = np.zeros((n_splits, n_classes)) # split_ratios = split_freq / split_freq.sum(axis=1) split_ratios = np.ones(split_freq.shape) / split_freq.shape[1] split_diffs = ((split_freq - target_ratio) ** 2).sum(axis=1) sortx = np.argsort(grouped_y_counts.sum(axis=1))[::-1] grouped_splitx = [] # import ubelt as ub # print(ub.repr2(grouped_y_counts, nl=-1)) # print('target_ratio = {!r}'.format(target_ratio)) for count, group_idx in enumerate(sortx): # print('---------\n') group_freq = grouped_y_counts[group_idx] cand_freq = (split_freq + group_freq) cand_freq = cand_freq.astype(np.float) cand_ratio = cand_freq / cand_freq.sum(axis=1)[:, None] cand_diffs = ((cand_ratio - target_ratio) ** 2).sum(axis=1) # Compute loss losses = [] # others = np.nan_to_num(split_diffs) other_diffs = np.array([ sum(split_diffs[x + 1:]) + sum(split_diffs[:x]) for x in range(n_splits) ]) # penalize unbalanced splits ratio_loss = other_diffs + cand_diffs # penalize heavy splits freq_loss = split_freq.sum(axis=1) freq_loss = freq_loss.astype(np.float) freq_loss = freq_loss / freq_loss.sum() losses = ratio_loss + freq_loss #------- splitx = np.argmin(losses) # print('losses = %r, splitx=%r' % (losses, splitx)) split_freq[splitx] = cand_freq[splitx] split_ratios[splitx] = cand_ratio[splitx] split_diffs[splitx] = cand_diffs[splitx] grouped_splitx.append(splitx) test_folds = np.empty(n_samples, dtype=int) for group_idx, splitx in zip(sortx, grouped_splitx): idxs = group_idxs[group_idx] test_folds[idxs] = splitx return test_folds
def draw_points(xy, color='blue', class_idxs=None, classes=None, ax=None, alpha=None, radius=1, **kwargs): """ Args: xy (ndarray): of points. Example: >>> from kwplot.mpl_draw import * # NOQA >>> import kwimage >>> xy = kwimage.Points.random(10).xy >>> draw_points(xy, radius=0.01) >>> draw_points(xy, class_idxs=np.random.randint(0, 3, 10), >>> radius=0.01, classes=['a', 'b', 'c'], color='classes') Ignore: >>> import kwplot >>> kwplot.autompl() """ import kwimage import matplotlib as mpl from matplotlib import pyplot as plt if ax is None: ax = plt.gca() xy = xy.reshape(-1, 2) # More grouped patches == more efficient runtime if alpha is None: alpha = [1.0] * len(xy) elif not ub.iterable(alpha): alpha = [alpha] * len(xy) if color == 'distinct': colors = kwimage.Color.distinct(len(alpha)) elif color == 'classes': # TODO: read colors from categories if they exist if class_idxs is None or classes is None: raise Exception( 'cannot draw class colors without class_idxs and classes') try: cls_colors = kwimage.Color.distinct(len(classes)) except KeyError: raise Exception( 'cannot draw class colors without class_idxs and classes') import kwarray _keys, _vals = kwarray.group_indices(class_idxs) colors = list(ub.take(cls_colors, class_idxs)) else: colors = [color] * len(alpha) ptcolors = [ kwimage.Color(c, alpha=a).as01('rgba') for c, a in zip(colors, alpha) ] color_groups = ub.group_items(range(len(ptcolors)), ptcolors) circlekw = { 'radius': radius, 'fill': True, 'ec': None, } if 'fc' in kwargs: import warnings warnings.warning('Warning: specifying fc to Points.draw overrides ' 'the color argument. Use color instead') circlekw.update(kwargs) fc = circlekw.pop('fc', None) # hack collections = [] for pcolor, idxs in color_groups.items(): # hack for fc if fc is not None: pcolor = fc patches = [ mpl.patches.Circle((x, y), fc=pcolor, **circlekw) for x, y in xy[idxs] ] col = mpl.collections.PatchCollection(patches, match_original=True) collections.append(col) ax.add_collection(col) return collections
def from_coco(KW18, coco_dset): import kwimage raw = {col: None for col in KW18.DEFAULT_COLUMNS} anns = coco_dset.dataset['annotations'] boxes = kwimage.Boxes(np.array([ann['bbox'] for ann in anns]), 'xywh') tlbr = boxes.to_tlbr() cxywh = tlbr.to_cxywh() tl_x, tl_y, br_x, br_y = tlbr.data.T cx = cxywh.data[:, 0] cy = cxywh.data[:, 1] # Create track ids if not given track_ids = np.array([ann.get('track_id', np.nan) for ann in anns]) missing = np.isnan(track_ids) valid_track_ids = track_ids[~missing] if len(valid_track_ids) == 0: next_track_id = 1 else: next_track_id = valid_track_ids.max() + 1 num_need = np.sum(missing) new_track_ids = np.arange(next_track_id, next_track_id + num_need) track_ids[missing] = new_track_ids track_ids = track_ids.astype(int) scores = np.array([ann.get('score', -1) for ann in anns]) image_ids = np.array([ann['image_id'] for ann in anns]) cids = np.array([ann.get('category_id', -1) for ann in anns]) num = len(anns) raw['track_id'] = track_ids raw['track_length'] = np.full(num, fill_value=-1) raw['frame_number'] = image_ids raw['tracking_plane_loc_x'] = cx raw['tracking_plane_loc_y'] = cy raw['velocity_x'] = np.full(num, fill_value=0) raw['velocity_y'] = np.full(num, fill_value=0) raw['image_loc_x'] = cx raw['image_loc_y'] = cy raw['img_bbox_tl_x'] = tl_x raw['img_bbox_tl_y'] = tl_y raw['img_bbox_br_x'] = br_x raw['img_bbox_br_y'] = br_y raw['area'] = boxes.area.ravel() raw['world_loc_x'] = np.full(num, fill_value=-1) raw['world_loc_y'] = np.full(num, fill_value=-1) raw['world_loc_z'] = np.full(num, fill_value=-1) raw['timestamp'] = np.full(num, fill_value=-1) raw['confidence'] = scores raw['object_type_id'] = cids raw = {k: v for k, v in raw.items() if v is not None} track_ids, groupxs = kwarray.group_indices(raw['track_id']) for groupx in groupxs: raw['track_length'][groupx] = len(groupx) self = KW18(raw) return self
def _entropy_refine(depth, nodes, jdxs): """ Recursively descend the class tree starting at the coursest level. At each level we decide if the items will take a category at this level of granulatority or try to take a more fine-grained label. Args: depth (int): current depth in the tree nodes (list) : set of sibling nodes at a this level jdxs (ArrayLike): item indices that made it to this level (note idxs are used for class indices) """ if DEBUG: print(ub.color_text('* REFINE nodes={}'.format(nodes), 'blue')) # Look at the probabilities of each node at this level idxs = sorted(self.node_to_idx[node] for node in nodes) if ignore_class_idxs: ignore_nodes = set(ub.take(self.idx_to_node, ignore_class_idxs)) idxs = sorted(set(idxs) - set(ignore_class_idxs)) if len(idxs) == 0: raise ValueError('Cannot ignore all top-level classes') probs = flat_class_probs[jdxs][:, idxs] # Choose a highest probability category to predict at this level pred_conf, pred_cx = impl.max_argmax(probs, axis=1) pred_idxs = np.array(idxs)[impl.numpy(pred_cx)] # Group each example which predicted the same class at this level group_idxs, groupxs = kwarray.group_indices(pred_idxs) if DEBUG: groupxs = list(ub.take(groupxs, group_idxs.argsort())) group_idxs = group_idxs[group_idxs.argsort()] # print('groupxs = {!r}'.format(groupxs)) # print('group_idxs = {!r}'.format(group_idxs)) for idx, groupx in zip(group_idxs, groupxs): # Get the children of this node (idx) node = self.idx_to_node[idx] children = sorted(self.graph.successors(node)) if ignore_class_idxs: children = sorted(set(children) - ignore_nodes) if children: # Check if it would be simple to refine the coarse category # current prediction into one of its finer-grained child # categories. Do this by considering the entropy at this # level if we replace this coarse-node with the child # fine-nodes. Then compare that entropy to what we would # get if we were perfectly uncertain about the child node # prediction (i.e. the worst case). If the entropy we get # is much lower than the worst case, then it is simple to # descend the tree and predict a finer-grained label. # Expand this node into all of its children child_idxs = set(self.node_to_idx[child] for child in children) # Get example indices (jdxs) assigned to category idx groupx.sort() group_jdxs = jdxs[groupx] # Expand this parent node, but keep the parent's siblings ommer_idxs = sorted(set(idxs) - {idx}) # Note: ommer = Aunt/Uncle expanded_idxs = sorted(ommer_idxs) + sorted(child_idxs) expanded_probs = flat_class_probs[group_jdxs][:, expanded_idxs] # Compute the entropy of the expanded distribution h_expanded = _criterion(expanded_probs) # Probability assigned to the parent p_parent = flat_class_probs[group_jdxs][:, idx:idx + 1] # Get the absolute probabilities assigned the parents siblings ommer_probs = flat_class_probs[group_jdxs][:, sorted(ommer_idxs)] # Compute the worst-case entropy after expanding the node # In the worst case the parent probability is distributed # uniformly among all of its children c = len(children) child_probs_worst = impl.tile(p_parent / c, reps=[1, c]) expanded_probs_worst = impl.hstack([ommer_probs, child_probs_worst]) h_expanded_worst = _criterion(expanded_probs_worst) # Normalize the entropy we got by the worst case. # eps = float(np.finfo(np.float32).min) eps = 1e-30 complexity_ratio = h_expanded / (h_expanded_worst + eps) simplicity_ratio = 1 - complexity_ratio # If simplicity ratio is over a threshold refine the parent refine_flags = simplicity_ratio > thresh if always_refine_idxs is not None: if idx in always_refine_idxs: refine_flags[:] = 1 if len(child_idxs) == 1: # hack: always refine when there is one child, in this # case the simplicity measure will always be zero, # which is likely a problem with this criterion. refine_flags[:] = 1 refine_flags = kwarray.ArrayAPI.numpy(refine_flags).astype(np.bool) if DEBUG: print('-----------') print('idx = {!r}'.format(idx)) print('node = {!r}'.format(self.idx_to_node[idx])) print('ommer_idxs = {!r}'.format(ommer_idxs)) print('ommer_nodes = {!r}'.format( list(ub.take(self.idx_to_node, ommer_idxs)))) print('depth = {!r}'.format(depth)) import pandas as pd print('expanded_probs =\n{}'.format( ub.repr2(expanded_probs, precision=2, with_dtype=0, supress_small=True))) df = pd.DataFrame({ 'h': h_expanded, 'h_worst': h_expanded_worst, 'c_ratio': complexity_ratio, 's_ratio': simplicity_ratio, 'flags': refine_flags.astype(np.uint8) }) print(df) print('-----------') if np.any(refine_flags): refine_jdxs = group_jdxs[refine_flags] refine_idxs, refine_conf = _entropy_refine(depth + 1, children, refine_jdxs) # Overwrite course decisions with refined decisions. refine_groupx = groupx[refine_flags] pred_idxs[refine_groupx] = refine_idxs pred_conf[refine_groupx] = refine_conf return pred_idxs, pred_conf
def main(): """ Say we have a produces an assignment between true detections within images and some set of predictions. """ import numpy as np import ubelt as ub # Create demo detection metrics from kwcoco.metrics import DetectionMetrics dmet = DetectionMetrics.demo(nimgs=1000, nboxes=(0, 10), n_fp=(0, 10), n_fn=(0, 10)) # We might have some sort of mapping between images and the predicted and # true boxes (note gid means imaGe id). gid_to_true = dmet.gid_to_true_dets gid_to_pred = dmet.gid_to_pred_dets print('gid_to_true = {}'.format(str(gid_to_true)[0:100] + ' ...')) print('gid_to_pred = {}'.format(str(gid_to_pred)[0:100] + ' ...')) """ gid_to_true = {0: <Detections(5) at 0x7fe08c335a10>, 1: <Detections(5) at 0x7fe08c3359d0>, 2: <Detections(8) at 0x ... gid_to_pred = {0: <Detections(2) at 0x7fe08c335990>, 1: <Detections(6) at 0x7fe08c335dd0>, 2: <Detections(13) at 0 ... """ # Each detection might have data like this print('gid_to_true[0].data = {}'.format(ub.repr2(gid_to_true[0].data, nl=1))) """ gid_to_true[0].data = { 'boxes': <Boxes(cxywh, array([[74.07547 , 61.581673 , 24.438194 , 47.287003 ], [28.509544 , 26.718906 , 3.487833 , 43.095215 ], [60.247677 , 65.802795 , 42.938393 , 36.610165 ], [35.281883 , 80.26636 , 4.0845375, 31.898323 ], [30.69794 , 83.549904 , 34.32573 , 7.9176483]], dtype=float32))>, 'class_idxs': np.array([1, 1, 1, 1, 1], dtype=np.int64), 'weights': np.array([1, 1, 1, 1, 1], dtype=np.int32), } """ # we can compute an association between each box and get a flat table table = dmet.confusion_vectors().data # The table of values might look something like this. # Again, note the gids correspond to imaGe ids # txs correspond to indexes of true detections in that image # pxs correspond to indexes of predicted detections in that image # A -1 in an index value means the row is unassociated print(table.pandas()[['gid', 'txs', 'pxs']]) """ gid txs pxs 0 0 3 0 1 0 4 1 2 0 0 -1 3 0 1 -1 4 0 2 -1 ... ... ... ... 9881 999 -1 1 9882 999 -1 3 9883 999 -1 2 9884 999 0 -1 9885 999 1 -1 """ # Say we need to know some attribute (e.g. the bounding boxes) for all of # the true associations, but the table is already flattened. (multiple # duplicate gids per row). How do we access that data? # We could use a list comprehension and lookup the Detections object for # that image and then look up the index within the image: data_attr_v1 = np.array( [[-1] * 4 if tx == -1 else gid_to_true[gid].data['boxes'].data[tx] for gid, tx in zip(table['gid'], table['txs'])]) # But that means we are accessing the __getitem__ of gid_to_true a lot # Is there a better way? # Yes, we can group the table by image id. import kwarray data_attr_v2 = np.full((len(table), 4), fill_value=-1.0) unique_gids, groupxs = kwarray.group_indices(table['gid']) for gid, groupxs in zip(unique_gids, groupxs): true_det = gid_to_true[gid] image_txs = table['txs'][groupxs] valid_flags = image_txs != -1 valid_txs = image_txs[valid_flags] valid_groupxs = groupxs[valid_flags] valid_attr = true_det.data['boxes'].data[valid_txs] data_attr_v2[valid_groupxs] = valid_attr # We can see both codeblocks are the same, but which is faster assert np.all(data_attr_v2 == data_attr_v1) import timerit ti = timerit.Timerit(50, bestof=10, verbose=2) for timer in ti.reset('list-comprehension'): with timer: data_attr_v1 = np.array( [[-1] * 4 if tx == -1 else gid_to_true[gid].data['boxes'].data[tx] for gid, tx in zip(table['gid'], table['txs'])]) for timer in ti.reset('grouping'): with timer: data_attr_v2 = np.full((len(table), 4), fill_value=-1.0) unique_gids, groupxs = kwarray.group_indices(table['gid']) for gid, groupxs in zip(unique_gids, groupxs): true_det = gid_to_true[gid] image_txs = table['txs'][groupxs] valid_flags = image_txs != -1 valid_txs = image_txs[valid_flags] valid_groupxs = groupxs[valid_flags] valid_attr = true_det.data['boxes'].data[valid_txs] data_attr_v2[valid_groupxs] = valid_attr
def draw(self, color='blue', ax=None, alpha=None, radius=1, **kwargs): """ TODO: can use kwplot.draw_points Example: >>> # xdoc: +REQUIRES(module:kwplot) >>> from kwimage.structs.points import * # NOQA >>> pts = Points.random(10) >>> # xdoc: +REQUIRES(--show) >>> pts.draw(radius=0.01) >>> from kwimage.structs.points import * # NOQA >>> self = Points.random(10, classes=['a', 'b', 'c']) >>> self.draw(radius=0.01, color='classes') """ import kwimage import matplotlib as mpl from matplotlib import pyplot as plt if ax is None: ax = plt.gca() xy = self.data['xy'].data.reshape(-1, 2) # More grouped patches == more efficient runtime if alpha is None: alpha = [1.0] * len(xy) elif not ub.iterable(alpha): alpha = [alpha] * len(xy) if color == 'distinct': colors = kwimage.Color.distinct(len(alpha)) elif color == 'classes': # TODO: read colors from categories if they exist try: class_idxs = self.data['class_idxs'] cls_colors = kwimage.Color.distinct(len(self.meta['classes'])) except KeyError: raise Exception('cannot draw class colors without class_idxs and classes') _keys, _vals = kwarray.group_indices(class_idxs) colors = list(ub.take(cls_colors, class_idxs)) else: colors = [color] * len(alpha) ptcolors = [kwimage.Color(c, alpha=a).as01('rgba') for c, a in zip(colors, alpha)] color_groups = ub.group_items(range(len(ptcolors)), ptcolors) circlekw = { 'radius': radius, 'fill': True, 'ec': None, } if 'fc' in kwargs: warnings.warning( 'Warning: specifying fc to Points.draw overrides ' 'the color argument. Use color instead') circlekw.update(kwargs) fc = circlekw.pop('fc', None) # hack collections = [] for pcolor, idxs in color_groups.items(): # hack for fc if fc is not None: pcolor = fc patches = [ mpl.patches.Circle((x, y), fc=pcolor, **circlekw) for x, y in xy[idxs] ] col = mpl.collections.PatchCollection(patches, match_original=True) collections.append(col) ax.add_collection(col) return collections
def draw_on(self, image, color='white', radius=None, copy=False): """ CommandLine: xdoctest -m ~/code/kwimage/kwimage/structs/points.py Points.draw_on --show Example: >>> # xdoc: +REQUIRES(module:kwplot) >>> from kwimage.structs.points import * # NOQA >>> s = 128 >>> image = np.zeros((s, s)) >>> self = Points.random(10).scale(s) >>> image = self.draw_on(image) >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.figure(fnum=1, doclf=True) >>> kwplot.autompl() >>> kwplot.imshow(image) >>> self.draw(radius=3, alpha=.5) >>> kwplot.show_if_requested() Example: >>> # xdoc: +REQUIRES(module:kwplot) >>> from kwimage.structs.points import * # NOQA >>> s = 128 >>> image = np.zeros((s, s)) >>> self = Points.random(10).scale(s) >>> image = self.draw_on(image, radius=3, color='distinct') >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.figure(fnum=1, doclf=True) >>> kwplot.autompl() >>> kwplot.imshow(image) >>> self.draw(radius=3, alpha=.5, color='classes') >>> kwplot.show_if_requested() Example: >>> import kwimage >>> s = 32 >>> self = kwimage.Points.random(10).scale(s) >>> color = 'blue' >>> # Test drawong on all channel + dtype combinations >>> im3 = np.zeros((s, s, 3), dtype=np.float32) >>> im_chans = { >>> 'im3': im3, >>> 'im1': kwimage.convert_colorspace(im3, 'rgb', 'gray'), >>> 'im4': kwimage.convert_colorspace(im3, 'rgb', 'rgba'), >>> } >>> inputs = {} >>> for k, im in im_chans.items(): >>> inputs[k + '_01'] = (kwimage.ensure_float01(im.copy()), {'radius': None}) >>> inputs[k + '_255'] = (kwimage.ensure_uint255(im.copy()), {'radius': None}) >>> outputs = {} >>> for k, v in inputs.items(): >>> im, kw = v >>> outputs[k] = self.draw_on(im, color=color, **kw) >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.figure(fnum=2, doclf=True) >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nCols=2, nRows=len(inputs)) >>> for k in inputs.keys(): >>> kwplot.imshow(inputs[k][0], fnum=2, pnum=pnum_(), title=k) >>> kwplot.imshow(outputs[k], fnum=2, pnum=pnum_(), title=k) >>> kwplot.show_if_requested() """ import kwimage dtype_fixer = _generic._consistent_dtype_fixer(image) if radius is None: if color == 'distinct': raise NotImplementedError image = kwimage.atleast_3channels(image) image = kwimage.ensure_float01(image, copy=copy) # value = kwimage.Color(color).as01() value = kwimage.Color(color)._forimage(image) image = self.data['xy'].fill( image, value, coord_axes=[1, 0], interp='bilinear') else: import cv2 image = kwimage.atleast_3channels(image, copy=copy) # note: ellipse has a different return type (UMat) and does not # work inplace if the input is not contiguous. image = np.ascontiguousarray(image) xy_pts = self.data['xy'].data.reshape(-1, 2) if color == 'distinct': colors = kwimage.Color.distinct(len(xy_pts)) elif color == 'classes': # TODO: read colors from categories if they exist class_idxs = self.data['class_idxs'] _keys, _vals = kwarray.group_indices(class_idxs) cls_colors = kwimage.Color.distinct(len(self.meta['classes'])) colors = list(ub.take(cls_colors, class_idxs)) colors = [kwimage.Color(c)._forimage(image) for c in colors] # if image.dtype.kind == 'f': # colors = [kwimage.Color(c).as01() for c in colors] # else: # colors = [kwimage.Color(c).as255() for c in colors] else: value = kwimage.Color(color)._forimage(image) colors = [value] * len(xy_pts) # image = kwimage.ensure_float01(image) for xy, color_ in zip(xy_pts, colors): # center = tuple(map(int, xy.tolist())) center = tuple(xy.tolist()) axes = (radius / 2, radius / 2) center = tuple(map(int, center)) axes = tuple(map(int, axes)) # print('center = {!r}'.format(center)) # print('axes = {!r}'.format(axes)) cv2.ellipse(image, center, axes, angle=0.0, startAngle=0.0, endAngle=360.0, color=color_, thickness=-1) image = dtype_fixer(image, copy=False) return image