def draw_instance_contours(img, gti, gtl=None, thickness=2, alpha=1, color=None): """ img = util.imread('/home/joncrall/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_RGB.tif') gti = util.imread(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_GTI.tif')) gtl = util.imread('/home/joncrall/remote/aretha/data/UrbanMapper3D/training/TAM_Tile_003_GTL.tif') thickness = 2 alpha = 1 """ import cv2 grouped_contours = instance_contours(gti) if gtl is not None: unknown_labels = set(np.unique(gti[gtl == 65])) else: unknown_labels = set() known_labels = set(grouped_contours.keys()) - unknown_labels BGR_GREEN = (0, 255, 0) BGR_BLUE = (255, 0, 0) img = util.ensure_float01(img) base = np.ascontiguousarray((255 * img[:, :, 0:3]).astype(np.uint8)) # Draw an image to overlay first draw_img = np.zeros(base.shape, dtype=np.uint8) if color is None: color = BGR_GREEN known_contours = np.array(list(ub.flatten(list(ub.take(grouped_contours, known_labels))))) draw_img = cv2.drawContours( image=draw_img, contours=known_contours, contourIdx=-1, color=color, thickness=thickness) if unknown_labels: unknown_contours = np.array(list(ub.flatten(ub.take(grouped_contours, unknown_labels)))) draw_img = cv2.drawContours( image=draw_img, contours=unknown_contours, contourIdx=-1, color=BGR_BLUE, thickness=thickness) contour_overlay = util.ensure_alpha_channel(draw_img, alpha=0) contour_overlay.T[3].T[draw_img.sum(axis=2) > 0] = alpha # zero out the edges to avoid visualization errors contour_overlay[0:thickness, :, :] = 0 contour_overlay[-thickness:, :, :] = 0 contour_overlay[:, 0:thickness, :] = 0 contour_overlay[:, -thickness:, :] = 0 # img1 = contour_overlay # img2 = base # from clab import profiler # _ = profiler.profile_onthefly(util.overlay_alpha_images)(contour_overlay, base, keepalpha=False) draw_img = util.overlay_alpha_images(contour_overlay, base, keepalpha=False) draw_img = np.ascontiguousarray((255 * draw_img[:, :, 0:3]).astype(np.uint8)) return draw_img
def unique(self, normalize=False): """ Returns the unique channels that will need to be given or loaded """ if normalize: return set(ub.flatten(self.parse().values())) else: return set(ub.flatten(self.normalize().values()))
def demodata_infr2(defaultdb='PZ_MTEST'): import ibeis from graphid.core.annot_inference import AnnotInference defaultdb = 'PZ_MTEST' ibs = ibeis.opendb(defaultdb=defaultdb) annots = ibs.annots() names = list(annots.group_items(annots.nids).values())[0:20] def dummy_phi(c, n): x = np.arange(n) phi = c * x / (c * x + 1) phi = phi / phi.sum() phi = np.diff(phi) return phi phis = {c: dummy_phi(c, 30) for c in range(1, 4)} aids = list(ub.flatten(names)) infr = AnnotInference(ibs, aids, autoinit=True) infr.init_termination_criteria(phis) infr.init_refresh_criteria() # Partially review n1, n2, n3, n4 = names[0:4] for name in names[4:]: for a, b in ub.iter_window(name.aids, 2): infr.add_feedback((a, b), POSTV) for name1, name2 in it.combinations(names[4:], 2): infr.add_feedback((name1.aids[0], name2.aids[0]), NEGTV) return infr
def pack(self): """ Pack all of the data in this container into a single tensor. Returns: Tensor: packed data, padded with ``self.padding_value`` if ``self.stack`` is False. Example: >>> self = BatchContainer.demo('img') >>> print(self.pack()) >>> self = BatchContainer.demo('box') >>> print(self.pack()) >>> self = BatchContainer.demo('labels') >>> print(self.pack()) """ if self.stack: # Should be a straight forward concatenation packed = torch.cat(self.data, dim=0) else: # Need to account for padding values from netharn.data.collate import padded_collate inbatch = list(ub.flatten(self.data)) packed = padded_collate(inbatch, fill_value=self.padding_value) return packed
def decollate_batch(batch): """ Breakup a collated batch of BatchContainers back into ItemContainers Example: >>> bsize = 5 >>> batch_items = [ >>> { >>> 'im': ItemContainer.demo('img'), >>> 'label': ItemContainer.demo('labels'), >>> 'box': ItemContainer.demo('box'), >>> } >>> for _ in range(bsize) >>> ] >>> batch = container_collate(batch_items, num_devices=2) >>> decollated = decollate_batch(batch) >>> assert len(decollated) == len(batch_items) >>> assert (decollated[0]['im'].data == batch_items[0]['im'].data).all() """ import ubelt as ub from kwcoco.util.util_json import IndexableWalker walker = IndexableWalker(batch) decollated_dict = ub.AutoDict() decollated_walker = IndexableWalker(decollated_dict) for path, batch_val in walker: if isinstance(batch_val, BatchContainer): for bx, item_val in enumerate(ub.flatten(batch_val.data)): decollated_walker[[bx] + path] = ItemContainer(item_val) decollated = list(decollated_dict.to_dict().values()) return decollated
def graph_info(graph, ignore=None, stats=False, verbose=False): from graphid import util import pandas as pd node_dict = graph.nodes node_attrs = list(node_dict.values()) edge_attrs = list(take_column(graph.edges(data=True), 2)) if stats: node_df = pd.DataFrame(node_attrs) edge_df = pd.DataFrame(edge_attrs) if ignore is not None: util.delete_dict_keys(node_df, ignore) util.delete_dict_keys(edge_df, ignore) # Not really histograms anymore try: node_attr_hist = node_df.describe().to_dict() except ValueError: node_attr_hist try: edge_attr_hist = edge_df.describe().to_dict() except ValueError: edge_attr_hist = {} key_order = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] node_attr_hist = ub.map_dict_vals(lambda x: util.order_dict_by(x, key_order), node_attr_hist) edge_attr_hist = ub.map_dict_vals(lambda x: util.order_dict_by(x, key_order), edge_attr_hist) else: node_attr_hist = ub.dict_hist(ub.flatten([attr.keys() for attr in node_attrs])) edge_attr_hist = ub.dict_hist(ub.flatten([attr.keys() for attr in edge_attrs])) if ignore is not None: util.delete_dict_keys(edge_attr_hist, ignore) util.delete_dict_keys(node_attr_hist, ignore) node_type_hist = ub.dict_hist(list(map(type, graph.nodes()))) info_dict = ub.odict([ ('directed', graph.is_directed()), ('multi', graph.is_multigraph()), ('num_nodes', len(graph)), ('num_edges', len(list(graph.edges()))), ('edge_attr_hist', util.sort_dict(edge_attr_hist)), ('node_attr_hist', util.sort_dict(node_attr_hist)), ('node_type_hist', util.sort_dict(node_type_hist)), ('graph_attrs', graph.graph), ('graph_name', graph.name), ]) if verbose: print(ub.repr2(info_dict)) return info_dict
def paths(self, cwd=None, recursive=False): groups = (p.paths(cwd=cwd, recursive=recursive) for p in self.patterns) if self.predicate in {any}: # all}: yield from ub.unique(ub.flatten(groups)) elif self.predicate in {all}: # all}: yield from set.intersection(*map(set, groups)) else: raise NotImplementedError
def __init__(self, pblm, pccs, dim=224, augment=True): chip_config = { # preserve aspect ratio, use letterbox to fit into network 'resize_dim': 'maxwh', 'dim_size': dim, # 'resize_dim': 'wh', # 'dim_size': (dim, dim) } self.pccs = pccs all_aids = list(ub.flatten(pccs)) all_fpaths = pblm.infr.ibs.depc_annot.get('chips', all_aids, read_extern=False, colnames='img', config=chip_config) self.aid_to_fpath = dict(zip(all_aids, all_fpaths)) # self.multitons_pccs = [pcc for pcc in pccs if len(pcc) > 1] self.pos_pairs = [] # SAMPLE ALL POSSIBLE POS COMBINATIONS AND IGNORE INCOMPARABLE self.infr = pblm.infr # TODO: each sample should be weighted depending on n_aids in its pcc for pcc in pccs: if len(pcc) >= 2: # ut.random_combinations edges = np.array( list(it.starmap(self.infr.e_, it.combinations(pcc, 2)))) is_comparable = self.is_comparable(edges) pos_edges = edges[is_comparable] self.pos_pairs.extend(list(pos_edges)) rng = nh.util.ensure_rng(self.SEED, 'numpy') self.pyrng = nh.util.ensure_rng(self.SEED + 1, 'python') self.rng = rng # Be good data citizens, construct a dataset identifier depends = [ sorted(map(sorted, self.pccs)), ] hashid = ub.hash_data(depends)[:12] self.input_id = '{}-{}'.format(len(self), hashid) if augment: import imgaug.augmenters as iaa # NOTE: we are only using `self.augmenter` to make a hyper hashid # in __getitem__ we invoke transform explicitly for fine control self.hue = nh.data.transforms.HSVShift(hue=0.1, sat=1.5, val=1.5) self.crop = iaa.Crop(percent=(0, .2)) self.flip = iaa.Fliplr(p=.5) self.augmenter = iaa.Sequential([self.hue, self.crop, self.flip]) else: self.augmenter = None self.letterbox = nh.data.transforms.Resize(target_size=(dim, dim), mode='letterbox')
def demodata_mtest_infr(state='empty'): import ibeis from graphid.core.annot_inference import AnnotInference ibs = ibeis.opendb(db='PZ_MTEST') annots = ibs.annots() names = list(annots.group_items(annots.nids).values()) util.shuffle(names, rng=321) test_aids = list(ub.flatten(names[1::2])) infr = AnnotInference(ibs, test_aids, autoinit=True) infr.reset(state=state) return infr
def rank_inventory(inventory): candidates = list(ub.flatten(list(pkmn.family(ancestors=False, node=True)) for pkmn in inventory)) groups = ub.group_items(candidates, key=lambda p: p.name) leages = { 'master': {'max_cp': float('inf')}, 'ultra': {'max_cp': 2500}, 'great': {'max_cp': 1500}, 'little': {'max_cp': 500}, } max_level = 45 # for XL candy # max_level = 40 # normal all_dfs = [] for name, group in groups.items(): print('\n\n------------\n\n') print('name = {!r}'.format(name)) for leage_name, leage_filters in leages.items(): max_cp = leage_filters['max_cp'] print('') print(' ========== ') print(' --- {} in {} --- '.format(name, leage_name)) not_eligible = [p for p in group if p.cp is not None and p.cp > max_cp] eligible = [p for p in group if p.cp is None or p.cp <= max_cp] print('not_eligible = {!r}'.format(not_eligible)) if len(eligible) > 0: first = ub.peek(eligible) have_ivs = eligible df = first.leage_rankings_for(have_ivs, max_cp=max_cp, max_level=max_level) all_dfs.append(df) else: print('none eligable') # Print out the best ranks for each set of IVS over all possible forms # (lets you know which ones can be transfered safely) iv_to_rank = ub.ddict(list) for df in all_dfs: if df is not None: df = df.set_index(['iva', 'ivd', 'ivs']) for iv, rank in zip(df.index, df['rank']): iv_to_rank[iv].append(rank) iv_to_best_rank = ub.map_vals(sorted, iv_to_rank) iv_to_best_rank = ub.sorted_vals(iv_to_best_rank) print('iv_to_best_rank = {}'.format(ub.repr2(iv_to_best_rank, nl=1, align=':')))
def demodata_tarjan_bridge(): """ Example: >>> from graphid import util >>> G = demodata_tarjan_bridge() >>> # xdoc: +REQUIRES(--show) >>> util.show_nx(G) >>> util.show_if_requested() """ # define 2-connected compoments and bridges cc2 = [(1, 2, 4, 3, 1, 4), (5, 6, 7, 5), (8, 9, 10, 8), (17, 18, 16, 15, 17), (11, 12, 14, 13, 11, 14)] bridges = [(4, 8), (3, 5), (3, 17)] G = nx.Graph(ub.flatten(ub.iter_window(path, 2) for path in cc2 + bridges)) return G
def normalize(self): """ Replace aliases with explicit single-band-per-code specs Example: >>> self = ChannelSpec('b1|b2|b3|rgb') >>> self.normalize() >>> list(self.keys()) """ new_parsed = {} for k1, v1 in self.parse().items(): norm_vals = list( ub.flatten(self._alias_lut.get(v, v).split('|') for v in v1)) norm_key = '|'.join(norm_vals) new_parsed[norm_key] = norm_vals new_spec = ','.join(list(new_parsed.keys())) normed = ChannelSpec(new_spec, parsed=new_parsed) return normed
def __getitem__(self, index): # Choose a label for each item in the batch if not hasattr(self.rng, 'choices'): # python 3.5 support chosen_labels = [ self.rng.choice(self.labels) for _ in range(self.batch_size) ] else: chosen_labels = self.rng.choices(self.labels, k=self.batch_size) # Count the number of items we need for each label label_freq = ub.dict_hist(chosen_labels) # Sample those indices batch_idxs = list( ub.flatten([ self.label_to_subsampler[label].sample(num) for label, num in label_freq.items() ])) return batch_idxs
def _balance_report(self, limit=None): # Print the epoch / item label frequency per epoch label_sequence = [] index_sequence = [] if limit is None: limit = self.num_batches for item_indices, _ in zip(self, range(limit)): item_indices = np.array(item_indices) item_labels = list( ub.flatten(ub.take(self.index_to_labels, item_indices))) index_sequence.extend(item_indices) label_sequence.extend(ub.unique(item_labels)) label_hist = ub.dict_hist(label_sequence) index_hist = ub.dict_hist(index_sequence) label_hist = ub.sorted_vals(label_hist, reverse=True) index_hist = ub.sorted_vals(index_hist, reverse=True) index_hist = ub.dict_subset(index_hist, list(index_hist.keys())[0:5]) print('label_hist = {}'.format(ub.repr2(label_hist, nl=1))) print('index_hist = {}'.format(ub.repr2(index_hist, nl=1)))
def take_percentile_parts(arr, front=None, mid=None, back=None): """ Take parts from front, back, or middle of a list Example: >>> arr = list(range(20)) >>> front = 3 >>> mid = 3 >>> back = 3 >>> result = take_percentile_parts(arr, front, mid, back) >>> print(result) [0, 1, 2, 9, 10, 11, 17, 18, 19] """ slices = [] if front: slices += [snapped_slice(len(arr), 0.0, front)] if mid: slices += [snapped_slice(len(arr), 0.5, mid)] if back: slices += [snapped_slice(len(arr), 1.0, back)] parts = list(ub.flatten([arr[sl] for sl in slices])) return parts
def sort_entries(bibman): def freq_group(items, groupids): groups = ub.group_items(items, groupids) hist = ub.map_vals(len, groups) for k in ub.argsort(hist): yield groups[k] high_level_alias = { 'incollection': 'book', 'conference': 'confjourn', 'journal': 'confjourn', 'online-journal': 'confjourn', } sorted_entries = [] entries = list(bibman.cleaned.values()) groups = [ high_level_alias.get(entry['pub_type'], entry['pub_type']) for entry in entries ] entry_groups = freq_group(entries, groups) for group in entry_groups: subids = [entry['ENTRYTYPE'] for entry in group] for subgroup in freq_group(group, subids): subsubids = [entry['pub_full'] for entry in subgroup] # Group publications, and then sort conferences by max date pub_groups = [] pub_maxdates = [] for ssg in freq_group(subgroup, subsubids): sssid = [(entry['date']) for entry in ssg] ssg2 = list(ub.take(ssg, ub.argsort(sssid))) pub_groups.append(ssg2) pub_maxdates.append(ssg2[-1]['date']) subgroup2 = list( ub.flatten(ut.sortedby2(pub_groups, pub_maxdates))) sorted_entries.extend(subgroup2) new_entries = ub.odict([(e['ID'], e) for e in sorted_entries]) [e['pub_type'] for e in sorted_entries] bibman.cleaned = new_entries
def _simple_sample(self): # Simple strategy for creating examples infr = self.infr self._triple_pool = [] for aid1, aid2 in self.infr.pos_graph.edges(): cc = infr.pos_graph.connected_to(aid1) neg_edges = graphid.util.edges_outgoing(self.infr.neg_graph, [aid1, aid2]) neg_aids = [] for edge in neg_edges: neg_aids.append(set(edge) - {aid1, aid2}) neg_aids = list(ub.flatten(neg_aids)) if neg_aids: aid3 = random.choice(neg_aids) else: cc2 = next(infr.find_non_neg_redun_pccs(cc=cc, k=1))[1] aid3 = random.choice(list(cc2)) # Check that we actually have the data if aid1 in self.coco_dset.anns and aid2 in self.coco_dset.anns and aid3 in self.coco_dset.anns: self._triple_pool.append((aid1, aid2, aid3))
def __getitem__(self, index): if not self.shuffle: import kwarray self.rng = kwarray.ensure_rng(index, api='python') sub_pccs = self.rng.sample(self.multitons, self.p) groups = [] for sub_pcc in sub_pccs: aids = self.rng.sample(sub_pcc, min(self.k, len(sub_pcc))) groups.append(aids) nhave = sum(map(len, groups)) while nhave < self.batch_size: sub_pcc = self.rng.choice(self.pccs) aids = self.rng.sample(sub_pcc, min(self.k, len(sub_pcc))) groups.append(aids) nhave = sum(map(len, groups)) overshoot = nhave - self.batch_size if overshoot: groups[-1] = groups[-1][:-overshoot] indices = sorted(ub.flatten(groups)) return indices
def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0, iou_thresh=0.5, bg_cidx=-1, bias=0.0, classes=None, compat='all', prioritize='iou', ignore_classes='ignore', max_dets=None): """ Create confusion vectors for detections by assigning to ground true boxes Given predictions and truth for an image return (y_pred, y_true, y_score), which is suitable for sklearn classification metrics Args: true_dets (Detections): groundtruth with boxes, classes, and weights pred_dets (Detections): predictions with boxes, classes, and scores iou_thresh (float, default=0.5): bounding box overlap iou threshold required for assignment bias (float, default=0.0): for computing bounding box overlap, either 1 or 0 gids (List[int], default=None): which subset of images ids to compute confusion metrics on. If not specified all images are used. compat (str, default='all'): can be ('ancestors' | 'mutex' | 'all'). determines which pred boxes are allowed to match which true boxes. If 'mutex', then pred boxes can only match true boxes of the same class. If 'ancestors', then pred boxes can match true boxes that match or have a coarser label. If 'all', then any pred can match any true, regardless of its category label. prioritize (str, default='iou'): can be ('iou' | 'class' | 'correct') determines which box to assign to if mutiple true boxes overlap a predicted box. if prioritize is iou, then the true box with maximum iou (above iou_thresh) will be chosen. If prioritize is class, then it will prefer matching a compatible class above a higher iou. If prioritize is correct, then ancestors of the true class are preferred over descendents of the true class, over unreleated classes. bg_cidx (int, default=-1): The index of the background class. The index used in the truth column when a predicted bounding box does not match any true bounding box. classes (List[str] | kwcoco.CategoryTree): mapping from class indices to class names. Can also contain class heirarchy information. ignore_classes (str | List[str]): class name(s) indicating ignore regions max_dets (int): maximum number of detections to consider TODO: - [ ] This is a bottleneck function. An implementation in C / C++ / Cython would likely improve the overall system. - [ ] Implement crowd truth. Allow multiple predictions to match any truth objet marked as "iscrowd". Returns: dict: with relevant confusion vectors. This keys of this dict can be interpreted as columns of a data frame. The `txs` / `pxs` columns represent the indexes of the true / predicted annotations that were assigned as matching. Additionally each row also contains the true and predicted class index, the predicted score, the true weight and the iou of the true and predicted boxes. A `txs` value of -1 means that the predicted box was not assigned to a true annotation and a `pxs` value of -1 means that the true annotation was not assigne to any predicted annotation. Example: >>> # xdoctest: +REQUIRES(module:pandas) >>> import pandas as pd >>> import kwimage >>> # Given a raw numpy representation construct Detection wrappers >>> true_dets = kwimage.Detections( >>> boxes=kwimage.Boxes(np.array([ >>> [ 0, 0, 10, 10], [10, 0, 20, 10], >>> [10, 0, 20, 10], [20, 0, 30, 10]]), 'tlbr'), >>> weights=np.array([1, 0, .9, 1]), >>> class_idxs=np.array([0, 0, 1, 2])) >>> pred_dets = kwimage.Detections( >>> boxes=kwimage.Boxes(np.array([ >>> [6, 2, 20, 10], [3, 2, 9, 7], >>> [3, 9, 9, 7], [3, 2, 9, 7], >>> [2, 6, 7, 7], [20, 0, 30, 10]]), 'tlbr'), >>> scores=np.array([.5, .5, .5, .5, .5, .5]), >>> class_idxs=np.array([0, 0, 1, 2, 0, 1])) >>> bg_weight = 1.0 >>> compat = 'all' >>> iou_thresh = 0.5 >>> bias = 0.0 >>> import kwcoco >>> classes = kwcoco.CategoryTree.from_mutex(list(range(3))) >>> bg_cidx = -1 >>> y = _assign_confusion_vectors(true_dets, pred_dets, bias=bias, >>> bg_weight=bg_weight, iou_thresh=iou_thresh, >>> compat=compat) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT pred true score weight iou txs pxs 0 1 2 0.5000 1.0000 1.0000 3 5 1 0 -1 0.5000 1.0000 -1.0000 -1 4 2 2 -1 0.5000 1.0000 -1.0000 -1 3 3 1 -1 0.5000 1.0000 -1.0000 -1 2 4 0 -1 0.5000 1.0000 -1.0000 -1 1 5 0 0 0.5000 0.0000 0.6061 1 0 6 -1 0 0.0000 1.0000 -1.0000 0 -1 7 -1 1 0.0000 0.9000 -1.0000 2 -1 Ignore: from xinspect.dynamic_kwargs import get_func_kwargs globals().update(get_func_kwargs(_assign_confusion_vectors)) Example: >>> # xdoctest: +REQUIRES(module:pandas) >>> import pandas as pd >>> from kwcoco.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo(nimgs=1, nclasses=8, >>> nboxes=(0, 20), n_fp=20, >>> box_noise=.2, cls_noise=.3) >>> classes = dmet.classes >>> gid = 0 >>> true_dets = dmet.true_detections(gid) >>> pred_dets = dmet.pred_detections(gid) >>> y = _assign_confusion_vectors(true_dets, pred_dets, >>> classes=dmet.classes, >>> compat='all', prioritize='class') >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT >>> y = _assign_confusion_vectors(true_dets, pred_dets, >>> classes=dmet.classes, >>> compat='ancestors', iou_thresh=.5) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT """ import kwarray valid_compat_keys = {'ancestors', 'mutex', 'all'} if compat not in valid_compat_keys: raise KeyError(compat) if classes is None and compat == 'ancestors': compat = 'mutex' if compat == 'mutex': prioritize = 'iou' # Group true boxes by class # Keep track which true boxes are unused / not assigned unique_tcxs, tgroupxs = kwarray.group_indices(true_dets.class_idxs) cx_to_txs = dict(zip(unique_tcxs, tgroupxs)) unique_pcxs = np.array(sorted(set(pred_dets.class_idxs))) if classes is None: import kwcoco # Build mutually exclusive category tree all_cxs = sorted( set(map(int, unique_pcxs)) | set(map(int, unique_tcxs))) all_cxs = list(range(max(all_cxs) + 1)) classes = kwcoco.CategoryTree.from_mutex(all_cxs) cx_to_ancestors = classes.idx_to_ancestor_idxs() if prioritize == 'iou': pdist_priority = None # TODO: cleanup else: pdist_priority = _fast_pdist_priority(classes, prioritize) if compat == 'mutex': # assume classes are mutually exclusive if hierarchy is not given cx_to_matchable_cxs = {cx: [cx] for cx in unique_pcxs} elif compat == 'ancestors': cx_to_matchable_cxs = { cx: sorted([cx] + sorted( ub.take(classes.node_to_idx, nx.ancestors(classes.graph, classes.idx_to_node[cx])))) for cx in unique_pcxs } elif compat == 'all': cx_to_matchable_cxs = {cx: unique_tcxs for cx in unique_pcxs} else: raise KeyError(compat) if compat == 'all': # In this case simply run the full pairwise iou common_true_idxs = np.arange(len(true_dets)) cx_to_matchable_txs = {cx: common_true_idxs for cx in unique_pcxs} common_ious = pred_dets.boxes.ious(true_dets.boxes, bias=bias) # common_ious = pred_dets.boxes.ious(true_dets.boxes, impl='c', bias=bias) iou_lookup = dict(enumerate(common_ious)) else: # For each pred-category find matchable true-indices cx_to_matchable_txs = {} for cx, compat_cx in cx_to_matchable_cxs.items(): matchable_cxs = cx_to_matchable_cxs[cx] compat_txs = ub.dict_take(cx_to_txs, matchable_cxs, default=[]) compat_txs = np.array(sorted(ub.flatten(compat_txs)), dtype=int) cx_to_matchable_txs[cx] = compat_txs # Batch up the IOU pre-computation between compatible truths / preds iou_lookup = {} unique_pred_cxs, pgroupxs = kwarray.group_indices(pred_dets.class_idxs) for cx, pred_idxs in zip(unique_pred_cxs, pgroupxs): true_idxs = cx_to_matchable_txs[cx] ious = pred_dets.boxes[pred_idxs].ious(true_dets.boxes[true_idxs], bias=bias) _px_to_iou = dict(zip(pred_idxs, ious)) iou_lookup.update(_px_to_iou) iou_thresh_list = ([iou_thresh] if not ub.iterable(iou_thresh) else iou_thresh) iou_thresh_to_y = {} for iou_thresh_ in iou_thresh_list: isvalid_lookup = { px: ious > iou_thresh_ for px, ious in iou_lookup.items() } y = _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, cx_to_matchable_txs, bg_weight, prioritize, iou_thresh_, pdist_priority, cx_to_ancestors, bg_cidx, ignore_classes=ignore_classes, max_dets=max_dets) iou_thresh_to_y[iou_thresh_] = y if ub.iterable(iou_thresh): return iou_thresh_to_y else: return y
def compute_likely_overlaps(pfiles1, pfiles2): step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1) step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2) step_idx = min(step_idx1, step_idx2) grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx) grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx) thresh = 0.2 verbose = 1 # TODO: it would be nice if we didn't have to care about internal # deduplication when we attempt to find cross-set overlaps dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles, thresh=thresh, verbose=verbose) dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles, thresh=thresh, verbose=verbose) pfiles = inv1.pfiles + inv2.pfiles dups3 = ProgressiveFile.likely_duplicates(pfiles, thresh=thresh, verbose=verbose) only_on_inv2 = {} for key, group in dups3.items(): if not any( item.fpath.startswith(inv1.root_fpath) for item in group): only_on_inv2[key] = group for p1 in inv1.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath: break for p2 in inv2.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath: break look = list(ub.flatten(only_on_inv2.values())) takealook = sorted([p.fpath for p in look]) print('takealook = {}'.format(ub.repr2(takealook, nl=1))) keys1 = set(grouped1) keys2 = set(grouped2) missing_keys2 = keys2 - keys1 missing_groups2 = ub.dict_subset(grouped2, missing_keys2) missing_fpaths2 = [] for key, values in missing_groups2.items(): print('key = {!r}'.format(key)) print('values = {}'.format(ub.repr2(values, nl=1))) missing_fpaths2.extend(values) missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2]) print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1))) # pass import xdev set_overlaps = xdev.set_overlaps(keys1, keys2) print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
def main(): # TODO: progressive hashing data structure inv1 = Inventory('/media/joncrall/raid/', blocklist) inv2 = Inventory('/media/joncrall/media', blocklist) # inv1 = Inventory('/media/joncrall/raid/Applications/NotGames', blocklist) # inv2 = Inventory('/media/joncrall/media/Applications/NotGames', blocklist) # inv1 = Inventory('/media/joncrall/raid/Applications', blocklist) # inv2 = Inventory('/media/joncrall/media/Applications', blocklist) self = inv1 # NOQA inv1.build() inv2.build() thresh = { 'frac': 0.5, 'byte': 100 * int(2**20) # only use the first few mb to determine overlap } verbose = 1 pfiles1 = inv1.pfiles pfiles2 = inv2.pfiles overlap, only1, only2 = ProgressiveFile.likely_overlaps(pfiles1, pfiles2, thresh=thresh, verbose=verbose) stats = { 'overlap': len(overlap), 'only1': len(only1), 'only2': len(only2), } print('stats = {}'.format(ub.repr2(stats, nl=1))) only2_list = sorted([p.fpath for group in only2.values() for p in group]) print('only2_list = {}'.format(ub.repr2(only2_list, nl=1))) print('stats = {}'.format(ub.repr2(stats, nl=1))) # for pfile in inv1.pfiles: # pfile._check_integrity() import numpy as np mb_read = np.array([ pfile._parts[-1][1] / int(2**20) for pfile in ub.ProgIter(inv2.pfiles) ]) mb_read.max() mb_read.min() # Build all hashes up to a reasonable degree inv1.build_hashes(max_workers=0) maybe_dups = inv1.likely_duplicates(thresh=0.2) len(maybe_dups) maybe_dups = ub.sorted_keys(maybe_dups, key=lambda x: x[2]) import networkx as nx import itertools as it # Check which directories are most likely to be duplicates graph = nx.Graph() for key, group in ub.ProgIter(maybe_dups.items(), total=len(maybe_dups), desc='build dup dir graph'): if key[0] == '': continue dpaths = [dirname(pfile.fpath) for pfile in group] for d1, d2 in it.combinations(dpaths, 2): graph.add_edge(d1, d2) edge = graph.edges[(d1, d2)] if 'dups' not in edge: edge['dups'] = 0 edge['dups'] += 1 edge_data = list(graph.edges(data=True)) for dpath in ub.ProgIter(graph.nodes, desc='find lens'): num_children = len(os.listdir(dpath)) graph.nodes[dpath]['num_children'] = num_children for d1, d2, dat in edge_data: nc1 = graph.nodes[d1]['num_children'] nc2 = graph.nodes[d2]['num_children'] ndups = dat['dups'] dup_score = (dat['dups'] / min(nc1, nc2)) dat['dup_score'] = dup_score if dup_score > 0.9: print('dup_score = {!r}'.format(dup_score)) print('d1 = {!r}'.format(d1)) print('d2 = {!r}'.format(d2)) print('nc1 = {!r}'.format(nc1)) print('nc2 = {!r}'.format(nc2)) print('ndups = {!r}'.format(ndups)) print('edge_data = {}'.format(ub.repr2(edge_data, nl=2))) print('maybe_dups = {}'.format(ub.repr2(maybe_dups.keys(), nl=3))) for key, group in maybe_dups.items(): if key[0] == '': continue print('key = {!r}'.format(key)) print('group = {}'.format(ub.repr2(group, nl=1))) for pfile in group: pfile.refined_to(float('inf')) print('key = {!r}'.format(key)) inv2.build_hashes(max_workers=6, mode='thread') inv1.pfiles = [ p for p in ub.ProgIter(inv1.pfiles, desc='exist check') if exists(p.fpath) ] inv2.pfiles = [ p for p in ub.ProgIter(inv2.pfiles, desc='exist check') if exists(p.fpath) ] pfiles1 = inv1.pfiles pfiles2 = inv2.pfiles def compute_likely_overlaps(pfiles1, pfiles2): step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1) step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2) step_idx = min(step_idx1, step_idx2) grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx) grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx) thresh = 0.2 verbose = 1 # TODO: it would be nice if we didn't have to care about internal # deduplication when we attempt to find cross-set overlaps dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles, thresh=thresh, verbose=verbose) dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles, thresh=thresh, verbose=verbose) pfiles = inv1.pfiles + inv2.pfiles dups3 = ProgressiveFile.likely_duplicates(pfiles, thresh=thresh, verbose=verbose) only_on_inv2 = {} for key, group in dups3.items(): if not any( item.fpath.startswith(inv1.root_fpath) for item in group): only_on_inv2[key] = group for p1 in inv1.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath: break for p2 in inv2.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath: break look = list(ub.flatten(only_on_inv2.values())) takealook = sorted([p.fpath for p in look]) print('takealook = {}'.format(ub.repr2(takealook, nl=1))) keys1 = set(grouped1) keys2 = set(grouped2) missing_keys2 = keys2 - keys1 missing_groups2 = ub.dict_subset(grouped2, missing_keys2) missing_fpaths2 = [] for key, values in missing_groups2.items(): print('key = {!r}'.format(key)) print('values = {}'.format(ub.repr2(values, nl=1))) missing_fpaths2.extend(values) missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2]) print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1))) # pass import xdev set_overlaps = xdev.set_overlaps(keys1, keys2) print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1))) # We want to know what files in set2 do not exist in set1 if 0: fpath = inv1.all_fpaths[0] pfile = ProgressiveFile(fpath) fpath1 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Transfer/Zebras/DownloadedLibraries/lightspeed/solve_triu.m' fpath2 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Zebras/downloaded_libraries/lightspeed/solve_triu.m' fpath1 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Falco/DarkFalco02.pcs' fpath2 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Ivysaur/Kraid-v2-Ivy.pcs' pfile = pfile1 = ProgressiveFile(fpath1) pfile2 = ProgressiveFile(fpath2) pfile.maybe_equal(pfile2, thresh=0.1) fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500] # fpaths = hash_groups1_dup['ef46db3751d8e999'] pfiles_demodata = [ProgressiveFile(f) for f in fpath_demodata] def progressive_duplicates(pfiles, idx=1): step_ids = [pfile.refined_to(idx) for pfile in ub.ProgIter(pfiles)] final_groups = {} grouped = ub.group_items(pfiles, step_ids) for key, group in grouped.items(): if len(group) > 1: if all(not g.can_refine for g in group): # Group is ~100% a real duplicate final_groups[key] = group else: pfiles = group deduped = progressive_duplicates(pfiles, idx=idx + 1) final_groups.update(deduped) else: final_groups[key] = group return final_groups pfiles = pfiles_demodata final_groups = progressive_duplicates(pfiles) for key, group in final_groups.items(): if len(group) > 1: print('key = {!r}'.format(key)) print('group = {}'.format(ub.repr2(group, nl=1))) inv1.build_hashes() inv2.build_hashes() hash_groups1 = ub.group_items(inv1.all_fpaths, inv1.all_hashes) hash_groups2 = ub.group_items(inv2.all_fpaths, inv2.all_hashes) hash_groups1_dup = { k: v for k, v in hash_groups1.items() if len(v) > 1 } hash_groups2_dup = { k: v for k, v in hash_groups2.items() if len(v) > 1 } len(hash_groups1_dup) len(hash_groups2_dup) # common = set(hash_groups1) & set(hash_groups2) # xdev.set_overlaps(hash_groups1, hash_groups2) fnames1 = ub.group_items(inv1.all_fpaths, key=basename) fnames2 = ub.group_items(inv2.all_fpaths, key=basename) missing = ub.dict_diff(fnames2, fnames1) sorted(ub.flatten(missing.values())) len(missing) fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500] def internal_deduplicate(self): hash_groups = ub.group_items(self.all_fpaths, self.all_hashes) hash_groups_dup = { k: v for k, v in hash_groups.items() if len(v) > 1 } from os.path import dirname hash_groups_dup['ef46db3751d8e999'] for key, values in hash_groups_dup.items(): for v in values: if v.endswith('.avi'): break [basename(v) for v in values] [dirname(v) for v in values]
def unique(self): """ Returns the unique channels that will need to be given or loaded """ return set(ub.flatten(self.parse().values()))
aug_graph = graph.copy() # remove cut edges from augmented graph edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut') cut_edges = [ (u, v) for (u, v, d) in aug_graph.edges(data=True) if not (d.get('is_cut') or d.get('decision', 'unreviewed') in ['nomatch']) ] cut_edges = [edge for edge, flag in edge_to_iscut.items() if flag] aug_graph.remove_edges_from(cut_edges) # Enumerate cliques inside labels unflat_edges = [ list(ub.iter_window(nodes, 2)) for nodes in label_to_nodes.values() ] node_pairs = [tup for tup in ub.flatten(unflat_edges) if tup[0] != tup[1]] # Remove candidate MST edges that exist in the original graph orig_edges = list(aug_graph.edges()) candidate_mst_edges = [ edge for edge in node_pairs if not aug_graph.has_edge(*edge) ] # randomness prevents chains and visually looks better rng = np.random.RandomState(42) def _randint(): return 0 return rng.randint(0, 100)
def demodata_bridge(): # define 2-connected compoments and bridges cc2 = [(1, 2, 4, 3, 1, 4), (8, 9, 10, 8), (11, 12, 13, 11)] bridges = [(4, 8), (3, 5), (20, 21), (22, 23, 24)] G = nx.Graph(ub.flatten(ub.iter_window(path, 2) for path in cc2 + bridges)) return G
def main(bib_fpath=None): r""" intro point to fixbib script CommmandLine: fixbib python -m fixtex bib python -m fixtex bib --dryrun python -m fixtex bib --dryrun --debug """ if bib_fpath is None: bib_fpath = 'My Library.bib' # DEBUG = ub.argflag('--debug') # Read in text and ensure ascii format dirty_text = ut.readfrom(bib_fpath) from fixtex.fix_tex import find_used_citations, testdata_fpaths if exists('custom_extra.bib'): extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False) parser = bparser.BibTexParser() ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing extra bibtex file') extra_text = ut.readfrom('custom_extra.bib') extra_database = extra_parser.parse(extra_text, partial=False) print('Finished parsing extra') extra_dict = extra_database.get_entry_dict() else: extra_dict = None #udata = dirty_text.decode("utf-8") #dirty_text = udata.encode("ascii", "ignore") #dirty_text = udata # parser = bparser.BibTexParser() # bib_database = parser.parse(dirty_text) # d = bib_database.get_entry_dict() print('BIBTEXPARSER LOAD') parser = bparser.BibTexParser(ignore_nonstandard_types=False, common_strings=True) ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing bibtex file') bib_database = parser.parse(dirty_text, partial=False) print('Finished parsing') bibtex_dict = bib_database.get_entry_dict() old_keys = list(bibtex_dict.keys()) new_keys = [] for key in ub.ProgIter(old_keys, label='fixing keys'): new_key = key new_key = new_key.replace(':', '') new_key = new_key.replace('-', '_') new_key = re.sub('__*', '_', new_key) new_keys.append(new_key) # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict' assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict' for key, new_key in zip(old_keys, new_keys): if key != new_key: entry = bibtex_dict[key] entry['ID'] = new_key bibtex_dict[new_key] = entry del bibtex_dict[key] # The bibtext is now clean. Print it to stdout #print(clean_text) verbose = None if verbose is None: verbose = 1 # Find citations from the tex documents key_list = None if key_list is None: cacher = ub.Cacher('texcite1', enabled=0) data = cacher.tryload() if data is None: fpaths = testdata_fpaths() key_list, inverse = find_used_citations(fpaths, return_inverse=True) # ignore = ['JP', '?', 'hendrick'] # for item in ignore: # try: # key_list.remove(item) # except ValueError: # pass if verbose: print('Found %d citations used in the document' % (len(key_list), )) data = key_list, inverse cacher.save(data) key_list, inverse = data # else: # key_list = None unknown_pubkeys = [] debug_author = ub.argval('--debug-author', default=None) # ./fix_bib.py --debug_author=Kappes if verbose: print('Fixing %d/%d bibtex entries' % (len(key_list), len(bibtex_dict))) # debug = True debug = False if debug_author is not None: debug = False known_keys = list(bibtex_dict.keys()) missing_keys = set(key_list) - set(known_keys) if extra_dict is not None: missing_keys.difference_update(set(extra_dict.keys())) if missing_keys: print('The library is missing keys found in tex files %s' % (ub.repr2(missing_keys), )) # Search for possible typos: candidate_typos = {} sedlines = [] for key in missing_keys: candidates = ut.closet_words(key, known_keys, num=3, subset=True) if len(candidates) > 1: top = candidates[0] if ut.edit_distance(key, top) == 1: # "sed -i -e 's/{}/{}/g' *.tex".format(key, top) import os replpaths = ' '.join( [relpath(p, os.getcwd()) for p in inverse[key]]) sedlines.append("sed -i -e 's/{}/{}/g' {}".format( key, top, replpaths)) candidate_typos[key] = candidates print('Cannot find key = %r' % (key, )) print('Did you mean? %r' % (candidates, )) print('Quick fixes') print('\n'.join(sedlines)) # group by file just = max([0] + list(map(len, missing_keys))) missing_fpaths = [inverse[key] for key in missing_keys] for fpath in sorted(set(ub.flatten(missing_fpaths))): # ut.fix_embed_globals() subkeys = [k for k in missing_keys if fpath in inverse[k]] print('') ut.cprint('--- Missing Keys ---', 'blue') ut.cprint('fpath = %r' % (fpath, ), 'blue') ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'), 'blue') for key in subkeys: print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'), ' '.join(candidate_typos[key]))) # for key in list(bibtex_dict.keys()): if extra_dict is not None: # Extra database takes precidence over regular key_list = list(ut.unique(key_list + list(extra_dict.keys()))) for k, v in extra_dict.items(): bibtex_dict[k] = v full = ub.argflag('--full') for key in key_list: try: entry = bibtex_dict[key] except KeyError: continue self = BibTexCleaner(key, entry, full=full) if debug_author is not None: debug = debug_author in entry.get('author', '') if debug: ut.cprint(' --- ENTRY ---', 'yellow') print(ub.repr2(entry, nl=1)) entry = self.fix() # self.clip_abstract() # self.shorten_keys() # self.fix_authors() # self.fix_year() # old_pubval = self.fix_pubkey() # if old_pubval: # unknown_pubkeys.append(old_pubval) # self.fix_arxiv() # self.fix_general() # self.fix_paper_types() if debug: print(ub.repr2(entry, nl=1)) ut.cprint(' --- END ENTRY ---', 'yellow') bibtex_dict[key] = entry unwanted_keys = set(bibtex_dict.keys()) - set(key_list) if verbose: print('Removing unwanted %d entries' % (len(unwanted_keys))) ut.delete_dict_keys(bibtex_dict, unwanted_keys) if 0: d1 = bibtex_dict.copy() full = True for key, entry in d1.items(): self = BibTexCleaner(key, entry, full=full) pub = self.publication() if pub is None: print(self.entry['ENTRYTYPE']) old = self.fix_pubkey() x1 = self._pubval() x2 = self.standard_pubval(full=full) # if x2 is not None and len(x2) > 5: # print(ub.repr2(self.entry)) if x1 != x2: print('x2 = %r' % (x2, )) print('x1 = %r' % (x1, )) print(ub.repr2(self.entry)) # if 'CVPR' in self.entry.get('booktitle', ''): # if 'CVPR' != self.entry.get('booktitle', ''): # break if old: print('old = %r' % (old, )) d1[key] = self.entry if full: d1 = bibtex_dict.copy() import numpy as np import pandas as pd df = pd.DataFrame.from_dict(d1, orient='index') paged_items = df[~pd.isnull(df['pub_accro'])] has_pages = ~pd.isnull(paged_items['pages']) print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) print(ub.repr2(paged_items[~has_pages]['title'].values.tolist())) entrytypes = dict(list(df.groupby('pub_type'))) if False: # entrytypes['misc'] g = entrytypes['online'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] entrytypes['book'] entrytypes['thesis'] g = entrytypes['article'] g = entrytypes['incollection'] g = entrytypes['conference'] def lookup_pub(e): if e == 'article': return 'journal', 'journal' elif e == 'incollection': return 'booksection', 'booktitle' elif e == 'conference': return 'conference', 'booktitle' return None, None for e, g in entrytypes.items(): print('e = %r' % (e, )) g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] if 'pub_full' in g.columns: place_title = g['pub_full'].tolist() print(ub.repr2(ub.dict_hist(place_title))) else: print('Unknown publications') if 'report' in entrytypes: g = entrytypes['report'] missing = g[pd.isnull(g['title'])] if len(missing): print('Missing Title') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'journal' in entrytypes: g = entrytypes['journal'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['journal'])] if len(missing): print('Missing Journal') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'conference' in entrytypes: g = entrytypes['conference'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'incollection' in entrytypes: g = entrytypes['incollection'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'thesis' in entrytypes: g = entrytypes['thesis'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['institution'])] if len(missing): print('Missing Institution') print(ub.repr2(missing[['title', 'author']].values.tolist())) # import utool # utool.embed() # Overwrite BibDatabase structure bib_database._entries_dict = bibtex_dict bib_database.entries = list(bibtex_dict.values()) #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ub.repr2(conftitle_to_types_set_hist)) print('Unknown conference keys:') print(ub.repr2(sorted(unknown_pubkeys))) print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) # Need to check #jegou_aggregating_2012 # Fix the Journal Abreviations # References: # https://www.ieee.org/documents/trans_journal_names.pdf # Write out clean bibfile in ascii format clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean') if not ub.argflag('--dryrun'): ut.writeto(clean_bib_fpath, new_bibtex_str)
def __init__(self, categories=None, fg_scale=0.5, fg_intensity=0.9, rng=None): """ Args: categories (List[Dict]): List of coco category dictionaries """ self.rng = kwarray.ensure_rng(rng) self.fg_scale = fg_scale self.fg_intensity = fg_intensity self._category_to_elemfunc = { 'superstar': lambda x: Rasters.superstar(), 'eff': lambda x: Rasters.eff(), 'box': lambda x: (skimage.morphology.square(x), None), 'star': lambda x: (star(x), None), 'circle': lambda x: (skimage.morphology.disk(x), None), 'octagon': lambda x: (skimage.morphology.octagon(x // 2, int(x / (2 * np.sqrt(2)))), None), 'diamond': lambda x: (skimage.morphology.diamond(x), None), } # Make generation of shapes a bit faster? # Maybe there are too many input combinations for this? # If we only allow certain size generations it should be ok # for key in self._category_to_elemfunc.keys(): # self._category_to_elemfunc[key] = ub.memoize(self._category_to_elemfunc[key]) # keep track of which keypoints belong to which categories self.categories = categories self.cname_to_kp = { c['name']: c.get('keypoints', []) for c in self.categories } self.obj_catnames = sorted([c['name'] for c in self.categories]) self.kp_catnames = sorted(ub.flatten(self.cname_to_kp.values())) kpname_to_cat = { c['name']: c for c in CategoryPatterns._default_keypoint_categories } self.keypoint_categories = list( ub.take(kpname_to_cat, self.kp_catnames)) # flatten list of all keypoint categories # self.kp_catnames = list( # ub.flatten([self.cname_to_kp.get(cname, []) # for cname in self.obj_catnames]) # ) self.cname_to_cid = {cat['name']: cat['id'] for cat in self.categories} self.cname_to_cx = { cat['name']: cx for cx, cat in enumerate(self.categories) }
def assert_recovery_invariant(infr, msg=''): if not DEBUG_INCON: return # infr.print('assert_recovery_invariant', 200) inconsistent_ccs = list(infr.inconsistent_components()) incon_cc = set(ub.flatten(inconsistent_ccs)) # NOQA
def measure_metrics(infr): real_pos_edges = [] n_true_merges = infr.test_state['n_true_merges'] confusion = infr.test_state['confusion'] n_tp = confusion[POSTV][POSTV] confusion[POSTV] columns = set(confusion.keys()) reviewd_cols = columns - {UNREV} non_postv = reviewd_cols - {POSTV} non_negtv = reviewd_cols - {NEGTV} n_fn = sum(ub.take(confusion[POSTV], non_postv)) n_fp = sum(ub.take(confusion[NEGTV], non_negtv)) n_error_edges = sum(confusion[r][c] + confusion[c][r] for r, c in it.combinations(reviewd_cols, 2)) # assert n_fn + n_fp == n_error_edges pred_n_pcc_mst_edges = n_true_merges # Find all annotations involved in a mistake assert n_error_edges == len(infr.mistake_edges) direct_mistake_aids = {a for edge in infr.mistake_edges for a in edge} mistake_nids = set(infr.node_labels(*direct_mistake_aids)) mistake_aids = set( ub.flatten([infr.pos_graph.component(nid) for nid in mistake_nids])) pos_acc = pred_n_pcc_mst_edges / infr.real_n_pcc_mst_edges metrics = { 'n_decision': infr.test_state['n_decision'], 'n_manual': infr.test_state['n_manual'], 'n_algo': infr.test_state['n_algo'], 'phase': infr.loop_phase, 'pos_acc': pos_acc, 'n_merge_total': infr.real_n_pcc_mst_edges, 'n_merge_remain': infr.real_n_pcc_mst_edges - n_true_merges, 'n_true_merges': n_true_merges, 'recovering': infr.is_recovering(), # 'recovering2': infr.test_state['recovering'], 'merge_remain': 1 - pos_acc, 'n_mistake_aids': len(mistake_aids), 'frac_mistake_aids': len(mistake_aids) / len(infr.aids), 'n_mistake_nids': len(mistake_nids), 'n_errors': n_error_edges, 'n_fn': n_fn, 'n_fp': n_fp, 'refresh_support': len(infr.refresh.manual_decisions), 'pprob_any': infr.refresh.prob_any_remain(), 'mu': infr.refresh._ewma, 'test_action': infr.test_state['test_action'], 'action': infr.test_state.get('action', None), 'user_id': infr.test_state['user_id'], 'pred_decision': infr.test_state['pred_decision'], 'true_decision': infr.test_state['true_decision'], 'n_neg_redun': infr.neg_redun_metagraph.number_of_edges(), 'n_neg_redun1': (infr.neg_metagraph.number_of_edges() - infr.neg_metagraph.number_of_selfloops()), } return metrics
def new_video_sample_grid(dset, window_dims, window_overlap=0.0, classes_of_interest=None, ignore_coverage_thresh=0.6, negative_classes={'ignore', 'background'}): """ Create a space time-grid to sample with Example: >>> from ndsampler.coco_regions import * # NOQA >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral', num_frames=5) >>> dset.conform() >>> window_dims = (2, 224, 224) >>> sample_grid = new_video_sample_grid(dset, window_dims) >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2))) >>> # Now try to load a sample >>> tr = sample_grid['positives'][0] >>> import ndsampler >>> sampler = ndsampler.CocoSampler(dset) >>> tr_ = sampler._infer_target_attributes(tr) >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1))) >>> sample = sampler.load_sample(tr) >>> assert sample['im'].shape == (2, 224, 224, 5) Ignore: import xdev globals().update(xdev.get_func_kwargs(new_video_sample_grid)) """ import kwarray from ndsampler import isect_indexer keepbound = True if classes_of_interest: raise NotImplementedError # Create a sliding window object for each specific image (because they may # have different sizes, technically we could memoize this) vidid_to_slider = {} for vidid, video in dset.index.videos.items(): gids = dset.index.vidid_to_gids[vidid] num_frames = len(gids) full_dims = [num_frames, video['height'], video['width']] window_dims_ = full_dims if window_dims == 'full' else window_dims slider = kwarray.SlidingWindow(full_dims, window_dims_, overlap=window_overlap, keepbound=keepbound, allow_overshoot=True) vidid_to_slider[vidid] = slider _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset) positives = [] negatives = [] for vidid, slider in vidid_to_slider.items(): regions = list(slider) gids = dset.index.vidid_to_gids[vidid] boxes = [] box_gids = [] for region in regions: t_sl, y_sl, x_sl = region region_gids = gids[t_sl] box_gids.append(region_gids) boxes.append([x_sl.start, y_sl.start, x_sl.stop, y_sl.stop]) boxes = kwimage.Boxes(np.array(boxes), 'ltrb') for region, region_gids, box in zip(regions, box_gids, boxes): # Check to see what annotations this window-box overlaps with region_aids = [] for gid in region_gids: # TODO: memoize to prevent dup queries (box is not hashable) aids = _isect_index.overlapping_aids(gid, box) region_aids.append(aids) pos_aids = sorted(ub.flatten(region_aids)) space_slice = region[1:3] time_slice = region[0] tr = { 'vidid': vidid, 'time_slice': time_slice, 'space_slice': space_slice, # 'slices': region, 'gids': region_gids, 'aids': pos_aids, } if len(pos_aids): positives.append(tr) else: negatives.append(tr) print('Found {} positives'.format(len(positives))) print('Found {} negatives'.format(len(negatives))) sample_grid = { 'positives': positives, 'negatives': negatives, } return sample_grid
def pvp_inventory(): """ The idea is you put info about your candidates here and we find good mons to power up. """ inventory = [ Pokemon('Magnezone', (14, 14, 14), cp=1815, form='Normal'), Pokemon('Magnemite', (7, 14, 9), cp=792), Pokemon('Magnemite', (10, 14, 13), cp=747), Pokemon('Magnemite', (13, 9, 15), cp=602), Pokemon('Magneton', (13, 14, 13), cp=550, form='Shadow'), Pokemon('Magnemite', (15, 13, 7), cp=293, form='Shadow'), Pokemon('Magnemite', (2, 14, 15), cp=283, form='Shadow'), ] inventory = [ Pokemon('sirfetch’d', (4, 11, 12), cp=1924, form='Galarian'), Pokemon('farfetch’d', (12, 15, 15), cp=1495, form='Galarian'), Pokemon('farfetch’d', (14, 14, 15), cp=948, form='Galarian'), ] inventory = [ Pokemon('bulbasaur', (7, 13, 12), cp=382, form='Shadow'), Pokemon('bulbasaur', (4, 8, 13), cp=366, form='Shadow'), Pokemon('bulbasaur', (7, 12, 8), cp=227, form='Shadow'), ] inventory = [ Pokemon('Clefable', (12, 13, 12), cp=1828), Pokemon('Clefairy', (4, 2, 7), cp=389), ] inventory = [ Pokemon('Jigglypuff', (10, 14, 15), cp=631), Pokemon('Jigglypuff', (10, 12, 15), cp=286), ] inventory = [ Pokemon('poliwag', (10, 13, 14), cp=335), Pokemon('poliwag', (10, 14, 13), cp=335), ] inventory = [ Pokemon('drifloon', (15, 15, 1), cp=695), Pokemon('drifloon', (0, 9, 14), cp=527), Pokemon('drifloon', (15, 15, 12), cp=509), Pokemon('drifloon', (14, 15, 14), cp=508), Pokemon('drifloon', (14, 11, 14), cp=497), Pokemon('drifloon', (11, 13, 12), cp=489, shiny=True), Pokemon('drifloon', (0, 4, 8), cp=336), Pokemon('drifloon', (12, 10, 12), cp=118), ] inventory = [ Pokemon('shelmet', (10, 15, 8), cp=351), Pokemon('shelmet', (0, 13, 0), cp=166), Pokemon('shelmet', (15, 10, 12), cp=158), ] inventory = [ Pokemon('Karrablast', (10, 4, 12), cp=824), Pokemon('Karrablast', (13, 13, 13), cp=655), Pokemon('Karrablast', (13, 14, 15), cp=16), ] inventory = [ Pokemon('Ralts', (14, 14, 13)), Pokemon('Ralts', (14, 11, 12)), Pokemon('Ralts', (0, 11, 0), shadow=True), Pokemon('Ralts', (1, 14, 2), shadow=True), Pokemon('Ralts', (12, 12, 6), shadow=True), Pokemon('Ralts', (5, 14, 14)), Pokemon('Ralts', (7, 11, 11)), ] inventory = [ Pokemon('Toxicroak', (11, 13, 14)), Pokemon('Croagunk', (9, 11, 13), cp=794), Pokemon('Croagunk', (8, 6, 8), cp=429), ] inventory = [ Pokemon('Snorlax', (7, 6, 13), shadow=True), Pokemon('Snorlax', (0, 0, 13), shadow=0), Pokemon('Snorlax', (8, 15, 14), shadow=0, cp=1155), Pokemon('Snorlax', (8, 12, 11), shadow=0, cp=2106), Pokemon('Snorlax', (9, 15, 10), shadow=0, cp=2487), Pokemon('Snorlax', (1, 15, 14), shadow=0, cp=1372), Pokemon('Snorlax', (7, 11, 15), shadow=0, cp=3044), Pokemon('Snorlax', (2, 15, 1), shadow=1), Pokemon('Munchlax', (14, 11, 14), shadow=0, cp=1056), ] inventory = [ Pokemon('Obstagoon', (11, 15, 13), cp=1478, form='Galarian'), Pokemon('zigzagoon', (10, 14, 14), cp=268, form='Galarian'), Pokemon('zigzagoon', (11, 12, 13), cp=268, form='Galarian'), Pokemon('zigzagoon', (11, 12, 15), cp=270, form='Galarian'), Pokemon('zigzagoon', (12, 11, 15), cp=272, form='Galarian'), ] inventory = [ Pokemon('Meditite', (5, 12, 4), cp=25), Pokemon('Medicham', (14, 12, 12), cp=1116), Pokemon('Medicham', (15, 15, 10), cp=966), ] for self in inventory: list(self.family()) candidates = list( ub.flatten(list(pkmn.family(ancestors=False)) for pkmn in inventory)) + inventory groups = ub.group_items(candidates, key=lambda p: p.name) leages = { 'master': { 'max_cp': float('inf') }, 'ultra': { 'max_cp': 2500 }, 'great': { 'max_cp': 1500 }, 'little': { 'max_cp': 500 }, } max_level = 45 # for XL candy max_level = 40 # normal for name, group in groups.items(): print('\n\n------------\n\n') print('name = {!r}'.format(name)) for leage_name, leage_filters in leages.items(): max_cp = leage_filters['max_cp'] print('') print(' ========== ') print(' --- {} in {} --- '.format(name, leage_name)) not_eligible = [ p for p in group if p.cp is not None and p.cp > max_cp ] print('not_eligible = {!r}'.format(not_eligible)) have_ivs = [p.ivs for p in group if p.cp is None or p.cp <= max_cp] if len(have_ivs) > 0: first = ub.peek(group) first.leage_rankings_for(have_ivs, max_cp=max_cp, max_level=max_level) else: print('none eligable')