def setup_sampler(config): workdir = nh.configure_workdir(config, workdir=join('~/work/siam-ibeis2', config['dbname'])) # TODO: cleanup and hook into ibeis AI if config['dbname'] == 'ggr2': print('Creating torch CocoDataset') root = ub.expandpath('~/data/') print('root = {!r}'.format(root)) train_dset = ndsampler.CocoDataset( data=join(root, 'ggr2-coco/annotations/instances_train2018.json'), img_root=join(root, 'ggr2-coco/images/train2018'), ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data=join(root, 'ggr2-coco/annotations/instances_val2018.json'), img_root=join(root, 'ggr2-coco/images/val2018'), ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') samplers = { 'train': ndsampler.CocoSampler(train_dset, workdir=workdir), 'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir), } if config['dbname'] == 'ggr2-revised': print('Creating torch CocoDataset') root = ub.expandpath('~/data/ggr2.coco.revised') print('root = {!r}'.format(root)) train_dset = ndsampler.CocoDataset( data=join(root, 'annotations/instances_train2019.json'), img_root=join(root, 'images/train2019'), ) train_dset.hashid = 'ggr2-coco-revised-train2019' vali_dset = ndsampler.CocoDataset( data=join(root, 'annotations/instances_val2019.json'), img_root=join(root, 'images/val2019'), ) vali_dset.hashid = 'ggr2-coco-revised-val2019' print('Creating samplers') samplers = { 'train': ndsampler.CocoSampler(train_dset, workdir=workdir), 'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir), } else: raise KeyError(config['dbname']) return samplers, workdir
def _to_coco(dmet): """ Convert to a coco representation of truth and predictions """ import ndsampler true = ndsampler.CocoDataset() pred = ndsampler.CocoDataset() for node in dmet.classes: # cid = dmet.classes.graph.node[node]['id'] cid = dmet.classes.index(node) supercategory = list(dmet.classes.graph.pred[node]) if len(supercategory) == 0: supercategory = None else: assert len(supercategory) == 1 supercategory = supercategory[0] true.add_category(node, id=cid, supercategory=supercategory) pred.add_category(node, id=cid, supercategory=supercategory) for imgname, gid in dmet._imgname_to_gid.items(): true.add_image(imgname, id=gid) pred.add_image(imgname, id=gid) idx_to_id = { idx: dmet.classes.index(node) for idx, node in enumerate(dmet.classes.idx_to_node) } for gid, pred_dets in dmet.gid_to_pred_dets.items(): pred_boxes = pred_dets.boxes if 'scores' in pred_dets.data: pred_scores = pred_dets.scores else: pred_scores = np.ones(len(pred_dets)) pred_cids = list(ub.take(idx_to_id, pred_dets.class_idxs)) pred_xywh = pred_boxes.to_xywh().data.tolist() for bbox, cid, score in zip(pred_xywh, pred_cids, pred_scores): pred.add_annotation(gid, cid, bbox=bbox, score=score) for gid, true_dets in dmet.gid_to_true_dets.items(): true_boxes = true_dets.boxes if 'weights' in true_dets.data: true_weights = true_dets.weights else: true_weights = np.ones(len(true_boxes)) true_cids = list(ub.take(idx_to_id, true_dets.class_idxs)) true_xywh = true_boxes.to_xywh().data.tolist() for bbox, cid, weight in zip(true_xywh, true_cids, true_weights): true.add_annotation(gid, cid, bbox=bbox, weight=weight) return pred, true
def parse_mscoco(): # Test that our implementation can handle the real mscoco data root = ub.expandpath('~/data/standard_datasets/mscoco/') fpath = join(root, 'annotations/instances_val2014.json') img_root = normpath(ub.ensuredir((root, 'images', 'val2014'))) # fpath = join(root, 'annotations/stuff_val2017.json') # img_root = normpath(ub.ensuredir((root, 'images', 'val2017'))) import ujson dataset = ujson.load(open(fpath, 'rb')) import ndsampler dset = ndsampler.CocoDataset(dataset) dset.img_root = img_root gid_iter = iter(dset.imgs.keys()) gid = ub.peek(gid_iter) for gid in ub.ProgIter(gid_iter): img = dset.imgs[gid] ub.grabdata(img['coco_url'], dpath=img_root, verbose=0) anns = [dset.anns[aid] for aid in dset.gid_to_aids[gid]] dset.show_image(gid=gid) ann = anns[0] segmentation = ann['segmentation'] from PIL import Image gpath = join(dset.img_root, img['file_name']) with Image.open(gpath) as pil_img: np_img = np.array(pil_img)
def demo(WindowedSamplerDataset, key='habcam', **kwargs): import ndsampler if key == 'habcam': dset_fpath = ub.expandpath('~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json') workdir = ub.expandpath('~/work/bioharn') dset = ndsampler.CocoDataset(dset_fpath) sampler = ndsampler.CocoSampler(dset, workdir=workdir, backend=None) else: sampler = ndsampler.CocoSampler.demo(key) self = WindowedSamplerDataset(sampler, **kwargs) return self
def detect_cli(config={}): """ CommandLine: python -m bioharn.detect_predict --help CommandLine: python -m bioharn.detect_predict \ --dataset=~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_test.mscoco.json \ --deployed=/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip \ --out_dpath=~/work/bioharn/habcam_test_out \ --draw=100 \ --input_dims=512,512 \ --xpu=0 --batch_size=1 Ignore: >>> config = {} >>> config['dataset'] = '~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json' >>> config['deployed'] = '/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip' >>> config['out_dpath'] = 'out' """ import kwarray import ndsampler from os.path import basename, join, exists, isfile, isdir # NOQA config = DetectPredictCLIConfig(config, cmdline=True) print('config = {}'.format(ub.repr2(config.asdict()))) out_dpath = ub.expandpath(config.get('out_dpath')) import six if isinstance(config['dataset'], six.string_types): if config['dataset'].endswith('.json'): dataset_fpath = ub.expandpath(config['dataset']) coco_dset = ndsampler.CocoDataset(dataset_fpath) # Running prediction is much faster if you can build a sampler. sampler_backend = { 'type': 'cog', 'config': { 'compress': 'JPEG', }, '_hack_old_names': False, # flip to true to use legacy caches } sampler_backend = None print('coco hashid = {}'.format(coco_dset._build_hashid())) else: sampler_backend = None if exists(config['dataset']) and isfile(config['dataset']): # Single image case image_fpath = ub.expandpath(config['dataset']) coco_dset = ndsampler.CocoDataset() coco_dset.add_image(image_fpath) elif isinstance(config['dataset'], list): # Multiple image case gpaths = config['dataset'] gpaths = [ub.expandpath(g) for g in gpaths] coco_dset = ndsampler.CocoDataset() for gpath in gpaths: coco_dset.add_image(gpath) else: raise TypeError(config['dataset']) draw = config.get('draw') workdir = ub.expandpath(config.get('workdir')) det_outdir = ub.ensuredir((out_dpath, 'pred')) pred_config = ub.dict_subset(config, DetectPredictConfig.default) print('Create sampler') sampler = ndsampler.CocoSampler(coco_dset, workdir=workdir, backend=sampler_backend) print('prepare frames') sampler.frames.prepare(workers=config['workers']) print('Create predictor') predictor = DetectPredictor(pred_config) print('Ensure model') predictor._ensure_model() pred_dataset = coco_dset.dataset.copy() pred_dataset['annotations'] = [] pred_dset = ndsampler.CocoDataset(pred_dataset) # self = predictor predictor.config['verbose'] = 1 pred_gen = predictor.predict_sampler(sampler) buffered_gen = AsyncBufferedGenerator(pred_gen, size=coco_dset.n_images) gid_to_pred = {} prog = ub.ProgIter(buffered_gen, total=coco_dset.n_images, desc='buffered detect') for img_idx, (gid, dets) in enumerate(prog): gid_to_pred[gid] = dets for ann in dets.to_coco(): ann['image_id'] = gid try: catname = ann['category_name'] ann['category_id'] = pred_dset._resolve_to_cid(catname) except KeyError: if 'category_id' not in ann: cid = pred_dset.add_category(catname) ann['category_id'] = cid pred_dset.add_annotation(**ann) single_img_coco = pred_dset.subset([gid]) single_pred_dpath = ub.ensuredir((det_outdir, 'single_image')) single_pred_fpath = join(single_pred_dpath, 'detections_gid_{:08d}.mscoco.json'.format(gid)) single_img_coco.dump(single_pred_fpath, newlines=True) if draw is True or (draw and img_idx < draw): draw_outdir = ub.ensuredir((out_dpath, 'draw')) img_fpath = coco_dset.load_image_fpath(gid) gname = basename(img_fpath) viz_fname = ub.augpath(gname, prefix='detect_', ext='.jpg') viz_fpath = join(draw_outdir, viz_fname) image = kwimage.imread(img_fpath) flags = dets.scores > .2 flags[kwarray.argmaxima(dets.scores, num=10)] = True top_dets = dets.compress(flags) toshow = top_dets.draw_on(image, alpha=None) # kwplot.imshow(toshow) kwimage.imwrite(viz_fpath, toshow, space='rgb') pred_fpath = join(det_outdir, 'detections.mscoco.json') print('Dump detections to pred_fpath = {!r}'.format(pred_fpath)) pred_dset.dump(pred_fpath, newlines=True)
def setup_harness(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness Example: >>> harn = setup_harness(dbname='PZ_MTEST') >>> harn.initialize() """ nice = kwargs.get('nice', 'untitled') batch_size = int(kwargs.get('batch_size', 6)) bstep = int(kwargs.get('bstep', 1)) workers = int(kwargs.get('workers', 0)) decay = float(kwargs.get('decay', 0.0005)) lr = float(kwargs.get('lr', 0.001)) dim = int(kwargs.get('dim', 416)) xpu = kwargs.get('xpu', 'argv') workdir = kwargs.get('workdir', None) dbname = kwargs.get('dbname', 'ggr2') if workdir is None: workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname)) ub.ensuredir(workdir) if dbname == 'ggr2': print('Creating torch CocoDataset') train_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018', ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018', ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir) vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir) print('Creating torch Datasets') datasets = { 'train': MatchingCocoDataset(train_sampler, train_dset, workdir, dim=dim, augment=True), 'vali': MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim), } else: from ibeis_utils import randomized_ibeis_dset datasets = randomized_ibeis_dset(dbname, dim=dim) for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) if workers > 0: import cv2 cv2.setNumThreads(0) loaders = { key: torch.utils.data.DataLoader(dset, batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } xpu = nh.XPU.cast(xpu) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': (MatchingNetworkLP, { 'p': 2, 'input_shape': (1, 3, dim, dim), }), 'criterion': (nh.criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), 'optimizer': (torch.optim.SGD, { 'lr': lr, 'weight_decay': decay, 'momentum': 0.9, 'nesterov': True, }), 'initializer': (nh.initializers.NoOp, {}), 'scheduler': (nh.schedulers.Exponential, { 'gamma': 0.99, 'stepsize': 2, }), # 'scheduler': (nh.schedulers.ListedLR, { # 'points': { # 1: lr * 1.0, # 19: lr * 1.1, # 20: lr * 0.1, # }, # 'interpolate': True # }), 'monitor': (nh.Monitor, { 'minimize': ['loss', 'pos_dist', 'brier'], 'maximize': ['accuracy', 'neg_dist', 'mcc'], 'patience': 40, 'max_epoch': 40, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def eval_detections_cli(**kw): """ CommandLine: xdoctest -m ~/code/netharn/netharn/metrics/detect_metrics.py eval_detections_cli """ import scriptconfig as scfg import ndsampler class EvalDetectionCLI(scfg.Config): default = { 'true': scfg.Path(None, help='true coco dataset'), 'pred': scfg.Path(None, help='predicted coco dataset'), 'out_dpath': scfg.Path('./out', help='output directory') } pass config = EvalDetectionCLI() cmdline = kw.pop('cmdline', True) config.load(kw, cmdline=cmdline) true_coco = ndsampler.CocoDataset(config['true']) pred_coco = ndsampler.CocoDataset(config['pred']) from netharn.metrics.detect_metrics import DetectionMetrics dmet = DetectionMetrics.from_coco(true_coco, pred_coco) voc_info = dmet.score_voc() cls_info = voc_info['perclass'][0] tp = cls_info['tp'] fp = cls_info['fp'] fn = cls_info['fn'] tpr = cls_info['tpr'] ppv = cls_info['ppv'] fp = cls_info['fp'] # Compute the MCC as TN->inf thresh = cls_info['thresholds'] # https://erotemic.wordpress.com/2019/10/23/closed-form-of-the-mcc-when-tn-inf/ mcc_lim = tp / (np.sqrt(fn + tp) * np.sqrt(fp + tp)) f1 = 2 * (ppv * tpr) / (ppv + tpr) draw = False if draw: mcc_idx = mcc_lim.argmax() f1_idx = f1.argmax() import kwplot plt = kwplot.autoplt() kwplot.multi_plot( xdata=thresh, ydata=mcc_lim, xlabel='threshold', ylabel='mcc*', fnum=1, pnum=(1, 4, 1), title='MCC*', color=['blue'], ) plt.plot(thresh[mcc_idx], mcc_lim[mcc_idx], 'r*', markersize=20) plt.plot(thresh[f1_idx], mcc_lim[f1_idx], 'k*', markersize=20) kwplot.multi_plot( xdata=fp, ydata=tpr, xlabel='fp (fa)', ylabel='tpr (pd)', fnum=1, pnum=(1, 4, 2), title='ROC', color=['blue'], ) plt.plot(fp[mcc_idx], tpr[mcc_idx], 'r*', markersize=20) plt.plot(fp[f1_idx], tpr[f1_idx], 'k*', markersize=20) kwplot.multi_plot( xdata=tpr, ydata=ppv, xlabel='tpr (recall)', ylabel='ppv (precision)', fnum=1, pnum=(1, 4, 3), title='PR', color=['blue'], ) plt.plot(tpr[mcc_idx], ppv[mcc_idx], 'r*', markersize=20) plt.plot(tpr[f1_idx], ppv[f1_idx], 'k*', markersize=20) kwplot.multi_plot( xdata=thresh, ydata=f1, xlabel='threshold', ylabel='f1', fnum=1, pnum=(1, 4, 4), title='F1', color=['blue'], ) plt.plot(thresh[mcc_idx], f1[mcc_idx], 'r*', markersize=20) plt.plot(thresh[f1_idx], f1[f1_idx], 'k*', markersize=20)
def convert_camvid_raw_to_coco(camvid_raw_info): """ Converts the raw camvid format to an MSCOCO based format, ( which lets use use ndsampler's COCO backend). Example: >>> # xdoctest: +REQUIRES(--download) >>> camvid_raw_info = grab_raw_camvid() >>> # test with a reduced set of data >>> del camvid_raw_info['img_paths'][2:] >>> del camvid_raw_info['mask_paths'][2:] >>> dset = convert_camvid_raw_to_coco(camvid_raw_info) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() >>> kwplot.figure(fnum=1, pnum=(1, 2, 1)) >>> dset.show_image(gid=1) >>> kwplot.figure(fnum=1, pnum=(1, 2, 2)) >>> dset.show_image(gid=2) """ import re import kwimage import ndsampler print('Converting CamVid to MS-COCO format') dset_root, img_paths, label_path, mask_paths = ub.take( camvid_raw_info, 'dset_root, img_paths, label_path, mask_paths'.split(', ')) img_infos = { 'img_fname': img_paths, 'mask_fname': mask_paths, } keys = list(img_infos.keys()) next_vals = list(zip(*img_infos.values())) image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals] dataset = { 'img_root': dset_root, 'images': [], 'categories': [], 'annotations': [], } lines = ub.readfrom(label_path).split('\n') lines = [line for line in lines if line] for line in lines: color_text, name = re.split('\t+', line) r, g, b = map(int, color_text.split(' ')) color = (r, g, b) # Parse the special camvid format cid = (r << 16) + (g << 8) + (b << 0) cat = { 'id': cid, 'name': name, 'color': color, } dataset['categories'].append(cat) for gid, img_item in enumerate(image_items, start=1): img = { 'id': gid, 'file_name': img_item['img_fname'], # nonstandard image field 'segmentation': img_item['mask_fname'], } dataset['images'].append(img) dset = ndsampler.CocoDataset(dataset) dset.rename_categories({'Void': 'background'}) assert dset.name_to_cat['background']['id'] == 0 dset.name_to_cat['background'].setdefault('alias', []).append('Void') if False: _define_camvid_class_hierarcy(dset) if 1: # TODO: Binarize CCs (and efficiently encode if possible) import numpy as np bad_info = [] once = False # Add images dset.remove_all_annotations() for gid, img in ub.ProgIter(dset.imgs.items(), desc='parse label masks'): mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cids = set(np.unique(cid_mask)) - {0} for cid in cids: if cid not in dset.cats: if gid == 618: # Handle a known issue with image 618 c_mask = (cid == cid_mask).astype(np.uint8) total_bad = c_mask.sum() if total_bad < 32: if not once: print( 'gid 618 has a few known bad pixels, ignoring them' ) once = True continue else: raise Exception('more bad pixels than expected') else: raise Exception( 'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_rgb = cid_to_rgb(cid) # print('bad_rgb = {!r}'.format(bad_rgb)) # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_info.append({ # 'gid': gid, # 'cid': cid, # }) else: ann = { 'category_id': cid, 'image_id': gid # 'segmentation': mask.to_coco() } assert cid in dset.cats c_mask = (cid == cid_mask).astype(np.uint8) mask = kwimage.Mask(c_mask, 'c_mask') box = kwimage.Boxes([mask.get_xywh()], 'xywh') # box = mask.to_boxes() ann['bbox'] = ub.peek(box.to_coco()) ann['segmentation'] = mask.to_coco() dset.add_annotation(**ann) if 0: bad_cids = [i['cid'] for i in bad_info] print(sorted([c['color'] for c in dataset['categories']])) print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info]))) gid = 618 img = dset.imgs[gid] mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cid_hist = ub.dict_hist(cid_mask.ravel()) bad_cid_hist = {} for cid in bad_cids: bad_cid_hist[cid] = cid_hist.pop(cid) import kwplot kwplot.autompl() kwplot.imshow(rgb_mask) if 0: import kwplot plt = kwplot.autoplt() plt.clf() dset.show_image(1) import xdev gid_list = list(dset.imgs) for gid in xdev.InteractiveIter(gid_list): dset.show_image(gid) xdev.InteractiveIter.draw() dset._build_index() dset._build_hashid() return dset
def grab_coco_camvid(): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> dset = grab_coco_camvid() >>> print('dset = {!r}'.format(dset)) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() >>> plt.clf() >>> dset.show_image(gid=1) Ignore: import xdev gid_list = list(dset.imgs) for gid in xdev.InteractiveIter(gid_list): dset.show_image(gid) xdev.InteractiveIter.draw() """ import ndsampler cache_dpath = ub.ensure_app_cache_dir('netharn', 'camvid') coco_fpath = join(cache_dpath, 'camvid.mscoco.json') # Need to manually bump this if you make a change to loading SCRIPT_VERSION = 'v4' # Ubelt's stamp-based caches are super cheap and let you take control of # the data format. stamp = ub.CacheStamp('camvid_coco', cfgstr=SCRIPT_VERSION, dpath=cache_dpath, product=coco_fpath, hasher='sha1', verbose=3) if stamp.expired(): camvid_raw_info = grab_raw_camvid() dset = convert_camvid_raw_to_coco(camvid_raw_info) with ub.Timer('dumping MS-COCO dset to: {}'.format(coco_fpath)): dset.dump(coco_fpath) # Mark this process as completed by saving a small file containing the # hash of the "product" you are stamping. stamp.renew() # We can also cache the index build step independently. This uses # ubelt.Cacher, which is pickle based, and writes the actual object to # disk. Each type of caching has its own uses and tradeoffs. cacher = ub.Cacher('prebuilt-coco', cfgstr=SCRIPT_VERSION, dpath=cache_dpath, verbose=3) dset = cacher.tryload() if dset is None: print('Reading coco_fpath = {!r}'.format(coco_fpath)) dset = ndsampler.CocoDataset(coco_fpath, tag='camvid') # Directly save the file to disk. dset._build_index() dset._build_hashid() cacher.save(dset) camvid_dset = dset print('Loaded camvid_dset = {!r}'.format(camvid_dset)) return camvid_dset
def grab_tiny_imagenet_as_coco(): import ubelt as ub url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip' dpath = ub.ensure_app_cache_dir('netharn', 'tiny-imagenet-200') dset_root = join(dpath, 'tiny-imagenet-200') zip_fpath = ub.grabdata(url, dpath=dpath) if not exists(dset_root): import zipfile zip_ref = zipfile.ZipFile(zip_fpath, 'r') zip_ref.extractall(dpath) zip_ref.close() tiny_imgnet_info = { 'train': join(dset_root, 'train'), 'test': join(dset_root, 'test'), 'vali': join(dset_root, 'val'), 'wnids': join(dset_root, 'wnids.txt'), 'words': join(dset_root, 'words.txt'), } import glob train_annots = list(glob.glob(join(tiny_imgnet_info['train'], '*/*boxes.txt'))) vali_annots = list(glob.glob(join(tiny_imgnet_info['vali'], 'val_annotations.txt'))) import ndsampler img_root = { 'train': join(tiny_imgnet_info['train']), 'vali': join(tiny_imgnet_info['vali'], 'images'), 'test': join(tiny_imgnet_info['test'], 'images'), } gpaths = { 'train': list(glob.glob(join(tiny_imgnet_info['train'], '*/images/*.JPEG'))), 'vali': list(glob.glob(join(tiny_imgnet_info['vali'], 'images/*.JPEG'))), 'test': list(glob.glob(join(tiny_imgnet_info['test'], 'images/*.JPEG'))) } annots_text = { 'train': ''.join(ub.readfrom(fpath) for fpath in train_annots), 'vali': ''.join(ub.readfrom(fpath) for fpath in vali_annots), } coco_datasets = { 'train': ndsampler.CocoDataset(tag='tiny-imagenet-train'), 'vali': ndsampler.CocoDataset(tag='tiny-imagenet-vali'), } for catname in (_ for _ in ub.readfrom(tiny_imgnet_info['wnids']).split('\n') if _): for dset in coco_datasets.values(): dset.add_category(name=catname) for tag in ['train', 'vali']: gpaths_ = gpaths[tag] annots_ = annots_text[tag] dset = coco_datasets[tag] dset.img_root = img_root[tag] for gpath in gpaths_: dset.add_image(file_name=gpath) for line in (_ for _ in annots_.split('\n') if _): parts = line.split('\t') if tag == 'train': gname = parts[0] catname = gname.split('_')[0] x, y, w, h = list(map(float, parts[1:])) gpath = join(img_root[tag], catname, 'images', gname) else: gname, catname = parts[0:2] x, y, w, h = list(map(float, parts[2:])) gpath = join(img_root[tag], gname) bbox = (x, y, w + 1, h + 1) cat = dset.name_to_cat[catname] img = dset.index.file_name_to_img[gpath] dset.add_annotation(image_id=img['id'], bbox=bbox, category_id=cat['id']) dset._ensure_imgsize() dset._build_hashid() print('dset.hashid = {!r}'.format(dset.hashid)) return coco_datasets