def load_image_with_annots(self, image_id=None, rng=None): """ Returns a random image and its annotations """ if image_id is not None and self.seed is not None: rng = kwarray.ensure_rng(self.seed * len(self) + image_id) rng = kwarray.ensure_rng(rng) img, anns = demodata_toy_img(gsize=self._full_imgsize, categories=self.catpats, gray=self.gray, rng=rng, n_annots=(0, 10)) _node_to_id = self.catgraph.node_to_id img['id'] = int(rng.rand() * self._n_images) img['file_name'] = '{}.png'.format(img['id']) for ann in anns: ann['category_id'] = _node_to_id[ann['category_name']] return img, anns
def get_positive(self, index=None, rng=None): """ Get localization information for a positive region Args: index (int or None): indexes into the current positive pool or if None returns a random negative rng (RandomState): used only if index is None Returns: Dict: tr: target info dictionary Example: >>> from ndsampler import coco_sampler >>> rng = kwarray.ensure_rng(0) >>> self = coco_sampler.CocoSampler.demo().regions >>> tr = self.get_positive(0, rng=rng) >>> print(ub.repr2(tr, precision=2)) """ if index is None: rng = kwarray.ensure_rng(rng) index = rng.randint(0, self.n_positives) if self._pos_select_idxs is not None: index = self._pos_select_idxs[index] tr = self.targets.iloc[index] return tr
def __init__(self, index_to_label, batch_size=1, num_batches='auto', quantile=0.5, shuffle=False, rng=None): import kwarray rng = kwarray.ensure_rng(rng, api='python') label_to_indices = kwarray.group_items(np.arange(len(index_to_label)), index_to_label) label_to_freq = ub.map_vals(len, label_to_indices) label_to_subsampler = { label: RingSampler(indices, shuffle=shuffle, rng=rng) for label, indices in label_to_indices.items() } self.label_to_freq = label_to_freq self.index_to_label = index_to_label self.batch_size = batch_size self.shuffle = shuffle self.rng = rng self.label_to_indices = label_to_indices self.label_to_subsampler = label_to_subsampler if num_batches == 'auto': self.num_batches = self._auto_num_batches(quantile) else: self.num_batches = num_batches self.labels = list(self.label_to_indices.keys())
def random(Coords, num=1, dim=2, rng=None, meta=None): """ Makes random coordinates; typically for testing purposes """ rng = kwarray.ensure_rng(rng) self = Coords(data=rng.rand(num, dim), meta=meta) return self
def random(cls, nesting=(2, 5), rng=None): """ CommandLine: xdoctest -m ndsampler.delayed DelayedWarp.random Example: >>> from ndsampler.delayed import * # NOQA >>> self = DelayedWarp.random(nesting=(4, 7)) >>> print('self = {!r}'.format(self)) >>> print(ub.repr2(self.nesting(), nl=-1, sort=0)) """ from kwarray.distributions import DiscreteUniform, Uniform rng = kwarray.ensure_rng(rng) chan_distri = DiscreteUniform.coerce((1, 5), rng=rng) nest_distri = DiscreteUniform.coerce(nesting, rng=rng) size_distri = DiscreteUniform.coerce((8, 64), rng=rng) raw_distri = Uniform(rng=rng) leaf_c = chan_distri.sample() leaf_w = size_distri.sample() leaf_h = size_distri.sample() raw = raw_distri.sample(leaf_h, leaf_w, leaf_c) layer = raw depth = nest_distri.sample() for _ in range(depth): tf = Affine.random(rng=rng).matrix layer = DelayedWarp(layer, tf, dsize='auto') self = layer return self
def demo(cls, key='img', rng=None, **kwargs): """ Create data for tests Example: >>> from netharn.data.data_containers import * # NOQA >>> print(ItemContainer.demo('img')) >>> print(ItemContainer.demo('labels')) >>> print(ItemContainer.demo('box')) """ import kwarray rng = kwarray.ensure_rng(rng) if key == 'img': shape = kwargs.get('shape', (3, 512, 512)) data = rng.rand(*shape).astype(np.float32) data = torch.from_numpy(data) self = cls(data, stack=True) elif key == 'labels': n = rng.randint(0, 10) data = rng.randint(0, 10, n) data = torch.from_numpy(data) self = cls(data, stack=False) elif key == 'box': n = rng.randint(0, 10) data = rng.rand(n, 4) data = torch.from_numpy(data) self = cls(data, stack=False) else: raise KeyError(key) return self
def image_variations(image_basis): """ Helper to make several variations of image inputs for opencv with different dtypes etc.. """ rng = kwarray.ensure_rng(0) if image_basis is None: image_basis = { 'dims': [(32, 32), (37, 41), (53, 31)], 'channels': [None, 1, 3, 4, 20, 1024], 'dtype': ['uint8', 'int64', 'float32', 'float64'], } # TODO: how to specify conditionals? # conditionals = { # np.uint8 # } for imgkw in list(basis_product(image_basis)): if imgkw['channels'] is None: shape = imgkw['dims'] else: shape = imgkw['dims'] + (imgkw['channels'], ) dtype = np.dtype(imgkw['dtype']) img = rng.rand(*shape) if dtype.kind in {'i', 'u'}: img = img * 255 img = img.astype(dtype) yield imgkw, img
def __init__(self, size=4, border=1, n=100, rng=None): import kwarray rng = kwarray.ensure_rng(rng) h = w = size whiteish = 1 - (np.abs(rng.randn(n, 1, h, w) / 4) % 1) blackish = (np.abs(rng.randn(n, 1, h, w) / 4) % 1) fw = border slices = [slice(None, fw), slice(-fw, None)] # class 0 is white block inside a black frame data1 = whiteish.copy() for sl1, sl2 in it.product(slices, slices): data1[..., sl1, :] = blackish[..., sl1, :] data1[..., :, sl2] = blackish[..., :, sl2] # class 1 is black block inside a white frame data2 = blackish.copy() for sl1, sl2 in it.product(slices, slices): data2[..., sl1, :] = whiteish[..., sl1, :] data2[..., :, sl2] = whiteish[..., :, sl2] self.data = np.concatenate([data1, data2], axis=0) self.labels = np.array(([0] * n) + ([1] * n)) suffix = ub.hash_data([size, border, n, rng], base='abc', hasher='sha1')[0:16] self.input_id = 'TD2D_{}_'.format(n) + suffix
def demo(cls, n=10, p_true=0.5, p_error=0.2, rng=None): """ Create random data for tests Example: >>> cfsn = BinaryConfusionVectors.demo(n=1000, p_error=0.1) >>> print(cfsn.data._pandas()) >>> roc_info = cfsn.roc() >>> pr_info = cfsn.precision_recall() >>> print('roc_info = {!r}'.format(roc_info)) >>> print('pr_info = {!r}'.format(pr_info)) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> kwplot.figure(fnum=1, pnum=(1, 2, 1)) >>> pr_info.draw() >>> kwplot.figure(fnum=1, pnum=(1, 2, 2)) >>> roc_info.draw() """ import kwarray rng = kwarray.ensure_rng(rng) score = rng.rand(n) data = kwarray.DataFrameArray({ 'is_true': (score > p_true).astype(np.uint8), 'pred_score': score, }) flags = rng.rand(n) < p_error data['is_true'][flags] = 1 - data['is_true'][flags] classes = ['c1', 'c2', 'c3'] self = cls(data, cx=1, classes=classes) return self
def _demo_probs(self, num=5, rng=0, nonrandom=3, hackargmax=True): """ dummy probabilities for testing """ import torch rng = kwarray.ensure_rng(rng) class_energy = torch.FloatTensor(rng.rand(num, len(self))) # Setup the first few examples to prefer being classified # as a fine grained class to a decreasing degree. # The first example is set to have equal energy # The i + 2-th example is set to have an extremely high energy. start = 0 nonrandom = min(nonrandom, (num - start)) if nonrandom > 0: path = sorted(ub.take(self.node_to_idx, nx.dag_longest_path(self.graph))) class_energy[start] = 1 / len(class_energy[start]) if hackargmax: # HACK: even though we want to test uniform distributions, it makes # regression tests difficiult because torch and numpy return a # different argmax when the array has more than one max value. # add a VERY small epsilon to make max values distinct class_energy[start] += torch.linspace(0, .00001, len(class_energy[start])) if nonrandom > 1: for i in range(nonrandom - 2): class_energy[start + i + 1][path] += 2 ** (i / 4) class_energy[start + i + 2][path] += 2 ** 20 class_probs = self.hierarchical_softmax(class_energy, dim=1) return class_probs
def _demo_item(self, dims=(4, 4), rng=None): """ Create an input that satisfies this spec Returns: dict: an item like it might appear when its returned from the `__getitem__` method of a :class:`torch...Dataset`. Example: >>> dims = (1, 1) >>> ChannelSpec.coerce(3)._demo_item(dims, rng=0) >>> ChannelSpec.coerce('r|g|b|disaprity')._demo_item(dims, rng=0) >>> ChannelSpec.coerce('rgb|disaprity')._demo_item(dims, rng=0) >>> ChannelSpec.coerce('rgb,disaprity')._demo_item(dims, rng=0) >>> ChannelSpec.coerce('rgb')._demo_item(dims, rng=0) >>> ChannelSpec.coerce('gray')._demo_item(dims, rng=0) """ import torch import kwarray rng = kwarray.ensure_rng(rng) item_shapes = self._item_shapes(dims) item = { key: torch.from_numpy(rng.rand(*shape)) for key, shape in item_shapes.items() } return item
def worker_init_fn(worker_id): for i in range(worker_id + 1): seed = np.random.randint(0, int(2**32) - 1) seed = seed + worker_id kwarray.seed_global(seed) if self.augmenter: rng = kwarray.ensure_rng(None) self.augmenter.seed_(rng)
def load_negative(self, index=None, pad=None, window_dims=None, rng=None): if index is not None and self.seed is not None: rng = kwarray.ensure_rng(self.seed * len(self) + index) sample = self._load_toy_sample(window_dims, pad, rng, centerobj='neg', n_annots=self._n_annots_neg) return sample
def __init__(self, items, shuffle=False, rng=None): import kwarray if len(items) == 0: raise Exception('no items to sample') self.rng = kwarray.ensure_rng(rng) self.items = np.array(items) self.shuffle = shuffle self.indices = np.arange(len(items)) self._pos = None self.refresh()
def random_ordered_tree(n, seed=None, pool=None): import kwarray rng = kwarray.ensure_rng(seed, 'python') tree = nx.dfs_tree(nx.random_tree(n, seed=seed)) otree = nx.OrderedDiGraph() otree.add_edges_from(tree.edges) if pool is not None: for node in otree.nodes: otree.nodes[node]['label'] = rng.choice(pool) return otree
def random(cls, shape=None, rng=None): import kwarray rng = kwarray.ensure_rng(rng) self = cls(None) if isinstance(shape, int): shape = (shape, shape) if shape is None: shape = self.shape self.matrix = rng.rand(*shape) return self
def worker_init_fn(worker_id, augmenter=None): for i in range(worker_id + 1): seed = np.random.randint(0, int(2**31) - 1) seed = seed + worker_id kwarray.seed_global(seed) worker_info = torch.utils.data.get_worker_info() self = worker_info.dataset if self.augmenter: rng = kwarray.ensure_rng(None) self.augmenter.seed_(rng)
def test_class_torch(): import numpy as np import torch import ubelt as ub import kwarray import kwimage thresh = .5 num = 500 rng = kwarray.ensure_rng(0) cpu_boxes = kwimage.Boxes.random(num, scale=400.0, rng=rng, format='tlbr', tensor=True) cpu_tlbr = cpu_boxes.to_tlbr().data # cpu_scores = torch.Tensor(rng.rand(len(cpu_tlbr))) # make all scores unique to ensure comparability cpu_scores = torch.Tensor(np.linspace(0, 1, len(cpu_tlbr))) cpu_cls = torch.LongTensor(rng.randint(0, 10, len(cpu_tlbr))) tlbr = cpu_boxes.to_tlbr().data.to('cuda') scores = cpu_scores.to('cuda') classes = cpu_cls.to('cuda') keep1 = [] for idxs in ub.group_items(range(len(classes)), classes.cpu().numpy()).values(): # cls_tlbr = tlbr.take(idxs, axis=0) # cls_scores = scores.take(idxs, axis=0) cls_tlbr = tlbr[idxs] cls_scores = scores[idxs] cls_keep = torch_nms(cls_tlbr, cls_scores, thresh=thresh, bias=0) keep1.extend(list(ub.compress(idxs, cls_keep.cpu().numpy()))) keep1 = sorted(keep1) keep_ = torch_nms(tlbr, scores, classes=classes, thresh=thresh, bias=0) keep2 = np.where(keep_.cpu().numpy())[0].tolist() keep3 = kwimage.non_max_supression(tlbr.cpu().numpy(), scores.cpu().numpy(), classes=classes.cpu().numpy(), thresh=thresh, bias=0, impl='gpu') print(len(keep1)) print(len(keep2)) print(len(keep3)) print(set(keep1) - set(keep2)) print(set(keep2) - set(keep1))
def random(self, n=3, rng=None, tight=False): """ Create a random MultiPolygon Returns: MultiPolygon """ import kwarray rng = kwarray.ensure_rng(rng) data = [Polygon.random(rng=rng, tight=tight) for _ in range(n)] self = MultiPolygon(data) return self
def random(cls, size=None, min=None, max=None, rng=None): min = np.iinfo(int).min if min is None else min max = np.iinfo(int).max if max is None else max rng = ensure_rng(rng) if size is None: n, d = map(int, rng.randint(min, max, size=2)) return cls(n, d) else: items = np.empty(size, dtype=Rational) ns, ds = rng.randint(min, max, size=(2, items.size)) items.ravel()[:] = [ Rational(int(n), int(d)) for n, d in zip(ns, ds) ] return items
def get_item(self, index, rng=None): """ Loads from positives and then negatives. """ if index is None: rng = kwarray.ensure_rng(rng) index = rng.randint(0, self.n_samples) if index < self.n_positives: sample = self.get_positive(index, rng=rng) else: index = index - self.n_positives sample = self.get_negative(index, rng=rng) return sample
def select_positive_regions(targets, window_dims=(300, 300), thresh=0.0, rng=None, verbose=0): """ Reduce positive example redundency by selecting disparate positive samples Example: >>> from ndsampler.coco_regions import * >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> targets = tabular_coco_targets(dset) >>> window_dims = (300, 300) >>> selected = select_positive_regions(targets, window_dims) >>> print(len(selected)) >>> print(len(dset.anns)) """ unique_gids, groupxs = kwarray.group_indices(targets['gid']) gid_to_groupx = dict(zip(unique_gids, groupxs)) wh, ww = window_dims rng = kwarray.ensure_rng(rng) selection = [] # Get all the bounding boxes cxs, cys = ub.take(targets, ['cx', 'cy']) n = len(targets) cxs = cxs.astype(np.float32) cys = cys.astype(np.float32) wws = np.full(n, ww, dtype=np.float32) whs = np.full(n, wh, dtype=np.float32) cxywh = np.hstack([a[:, None] for a in [cxs, cys, wws, whs]]) boxes = kwimage.Boxes(cxywh, 'cxywh').to_tlbr() iter_ = ub.ProgIter(gid_to_groupx.items(), enabled=verbose, label='select positive regions', total=len(gid_to_groupx), adjust=0, freq=32) for gid, groupx in iter_: # Select all candiate windows in this image cand_windows = boxes.take(groupx, axis=0) # Randomize which candidate windows have the highest scores so the # selection can vary each epoch. cand_scores = rng.rand(len(cand_windows)) cand_dets = kwimage.Detections(boxes=cand_windows, scores=cand_scores) # Non-max supresssion is really similar to set-cover keep = cand_dets.non_max_supression(thresh=thresh) selection.extend(groupx[keep]) selection = np.array(sorted(selection)) return selection
def main(cls, cmdline=True, **kw): """ Example: >>> kw = {'src': 'special:shapes8', >>> 'dst1': 'train.json', 'dst2': 'test.json'} >>> cmdline = False >>> cls = CocoSplitCLI >>> cls.main(cmdline, **kw) """ import kwcoco import kwarray from kwcoco.util import util_sklearn config = cls.CLIConfig(kw, cmdline=cmdline) print('config = {}'.format(ub.repr2(dict(config), nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) print('reading fpath = {!r}'.format(config['src'])) dset = kwcoco.CocoDataset.coerce(config['src']) annots = dset.annots() gids = annots.gids cids = annots.cids # Balanced category split rng = kwarray.ensure_rng(config['rng']) shuffle = rng is not None self = util_sklearn.StratifiedGroupKFold(n_splits=config['factor'], random_state=rng, shuffle=shuffle) split_idxs = list(self.split(X=gids, y=cids, groups=gids)) idxs1, idxs2 = split_idxs[0] gids1 = sorted(ub.unique(ub.take(gids, idxs1))) gids2 = sorted(ub.unique(ub.take(gids, idxs2))) dset1 = dset.subset(gids1) dset2 = dset.subset(gids2) dset1.fpath = config['dst1'] print('Writing dset1 = {!r}'.format(dset1.fpath)) dset1.dump(dset1.fpath, newlines=True) dset2.fpath = config['dst2'] print('Writing dset2 = {!r}'.format(dset2.fpath)) dset2.dump(dset2.fpath, newlines=True)
def _stratified_split(gids, cids, n_splits=2, rng=None): """ helper to split while trying to maintain class balance within images """ rng = kwarray.ensure_rng(rng) from ndsampler.utils import util_sklearn selector = util_sklearn.StratifiedGroupKFold(n_splits=n_splits, random_state=rng, shuffle=True) # from sklearn import model_selection # selector = model_selection.StratifiedKFold( # n_splits=n_splits, random_state=rng, shuffle=True) skf_list = list(selector.split(X=gids, y=cids, groups=gids)) trainx, testx = skf_list[0] if 0: _train_gids = set(ub.take(gids, trainx)) _test_gids = set(ub.take(gids, testx)) print('_train_gids = {!r}'.format(_train_gids)) print('_test_gids = {!r}'.format(_test_gids)) return trainx, testx
def random(cls, rng=None): """ Example: >>> self = Segmentation.random() >>> print('self = {!r}'.format(self)) >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> kwplot.figure(fnum=1, doclf=True) >>> self.draw() >>> kwplot.show_if_requested() """ import kwarray import kwimage rng = kwarray.ensure_rng(rng) if rng.rand() > 0.5: data = kwimage.Polygon.random() else: data = kwimage.Mask.random() return cls.coerce(data)
def __init__(self, num, dims=2, rng=None, **kwargs): self.rng = kwarray.ensure_rng(rng) self.num = num self.dims = dims # self.config = { # 'perception_thresh': 0.08, # 'max_speed': 0.005, # 'max_force': 0.0003, # } self.config = { 'perception_thresh': 0.2, 'max_speed': 0.01, 'max_force': 0.001, 'damping': 0.99, } self.config.update(ub.dict_isect(kwargs, self.config)) self.pos = None self.vel = None self.acc = None
def random(cls, num_parts=3, rng=None): """ CommandLine: xdoctest -m ndsampler.delayed DelayedWarp.random Example: >>> self = DelayedChannelConcat.random() >>> print('self = {!r}'.format(self)) >>> print(ub.repr2(self.nesting(), nl=-1, sort=0)) """ rng = kwarray.ensure_rng(rng) self_w = rng.randint(8, 64) self_h = rng.randint(8, 64) components = [] for _ in range(num_parts): subcomp = DelayedWarp.random(rng=rng) tf = Affine.random(rng=rng).matrix comp = subcomp.delayed_warp(tf, dsize=(self_w, self_h)) components.append(comp) self = DelayedChannelConcat(components) return self
def random(Points, num=1, classes=None, rng=None): """ Makes random points; typically for testing purposes Example: >>> import kwimage >>> self = kwimage.Points.random(classes=[1, 2, 3]) >>> self.data >>> print('self.data = {!r}'.format(self.data)) """ rng = kwarray.ensure_rng(rng) if ub.iterable(num): shape = tuple(num) + (2,) else: shape = (num, 2) self = Points(xy=rng.rand(*shape)) self.data['visible'] = np.full(len(self), fill_value=2) if classes is not None: class_idxs = (rng.rand(len(self)) * len(classes)).astype(np.int) self.data['class_idxs'] = class_idxs self.meta['classes'] = classes return self
def _gen_cluttered_func(n=100): lines = [] import ubelt as ub import kwarray rng = kwarray.ensure_rng(0) varnames = [] for i in range(n): mode = rng.choice(['int', 'float', 'str']) if mode == 'int': value = rng.randint(0, 100000) if mode == 'str': value = ub.hash_data(rng.randint(0, 100000))[0:10] if mode == 'float': value = rng.randn() * 1000 varname = 'var{:03d}'.format(i) line = '{} = {!r}'.format(varname, value) lines.append(line) varnames.append(varname) clutter_vars = ub.indent('\n'.join(lines)) template = ub.codeblock(''' def {FUNCNAME}(): {CLUTTER} ignore_inf_loss_parts = d['ignore_inf_loss_parts'] for i in range(num_inner_loops): if ignore_inf_loss_parts: pass # return {RETVAL} ''') retval = '[{}]'.format(','.join(varnames)) funcname = 'clutter_{}'.format(n) text = template.format(FUNCNAME=funcname, CLUTTER=clutter_vars, RETVAL=retval) return text, funcname
def _make_intmask_demodata(rng=None): """ Creates demo data to test the script """ import kwarray rng = kwarray.ensure_rng(rng) # seeded random number generator dpath = ub.ensure_app_cache_dir('kwcoco/tests/masks') shape = (128, 128) num_masks = 10 def _random_multi_obj_mask(shape, rng): """ Create random int mask objects that can contain multiple objects. Each object is a different positive integer Ignore: kwplot.imshow(kwimage.normalize(data)) kwplot.imshow(data) """ num_objects = rng.randint(0, 5) data = np.zeros(shape, dtype=np.uint8) for obj_idx in range(0, num_objects + 1): # Make a binay mask and add it as a new objct binmask = kwimage.Mask.random(shape=shape, rng=rng).data data[binmask > 0] = obj_idx return data fpaths = [ join(dpath, 'mask_{:04d}.png'.format(mask_idx)) for mask_idx in range(num_masks) ] for fpath in fpaths: data = _random_multi_obj_mask(shape, rng=rng) kwimage.imwrite(fpath, data) return dpath