Exemple #1
0
def setup_sampler(config):
    workdir = nh.configure_workdir(config,
                                   workdir=join('~/work/siam-ibeis2',
                                                config['dbname']))

    # TODO: cleanup and hook into ibeis AI
    if config['dbname'] == 'ggr2':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_train2018.json'),
            img_root=join(root, 'ggr2-coco/images/train2018'),
        )
        train_dset.hashid = 'ggr2-coco-train2018'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_val2018.json'),
            img_root=join(root, 'ggr2-coco/images/val2018'),
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    if config['dbname'] == 'ggr2-revised':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/ggr2.coco.revised')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_train2019.json'),
            img_root=join(root, 'images/train2019'),
        )
        train_dset.hashid = 'ggr2-coco-revised-train2019'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_val2019.json'),
            img_root=join(root, 'images/val2019'),
        )
        vali_dset.hashid = 'ggr2-coco-revised-val2019'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    else:
        raise KeyError(config['dbname'])

    return samplers, workdir
Exemple #2
0
    def _to_coco(dmet):
        """
        Convert to a coco representation of truth and predictions
        """
        import ndsampler
        true = ndsampler.CocoDataset()
        pred = ndsampler.CocoDataset()

        for node in dmet.classes:
            # cid = dmet.classes.graph.node[node]['id']
            cid = dmet.classes.index(node)
            supercategory = list(dmet.classes.graph.pred[node])
            if len(supercategory) == 0:
                supercategory = None
            else:
                assert len(supercategory) == 1
                supercategory = supercategory[0]
            true.add_category(node, id=cid, supercategory=supercategory)
            pred.add_category(node, id=cid, supercategory=supercategory)

        for imgname, gid in dmet._imgname_to_gid.items():
            true.add_image(imgname, id=gid)
            pred.add_image(imgname, id=gid)

        idx_to_id = {
            idx: dmet.classes.index(node)
            for idx, node in enumerate(dmet.classes.idx_to_node)
        }

        for gid, pred_dets in dmet.gid_to_pred_dets.items():
            pred_boxes = pred_dets.boxes
            if 'scores' in pred_dets.data:
                pred_scores = pred_dets.scores
            else:
                pred_scores = np.ones(len(pred_dets))
            pred_cids = list(ub.take(idx_to_id, pred_dets.class_idxs))
            pred_xywh = pred_boxes.to_xywh().data.tolist()
            for bbox, cid, score in zip(pred_xywh, pred_cids, pred_scores):
                pred.add_annotation(gid, cid, bbox=bbox, score=score)

        for gid, true_dets in dmet.gid_to_true_dets.items():
            true_boxes = true_dets.boxes
            if 'weights' in true_dets.data:
                true_weights = true_dets.weights
            else:
                true_weights = np.ones(len(true_boxes))
            true_cids = list(ub.take(idx_to_id, true_dets.class_idxs))
            true_xywh = true_boxes.to_xywh().data.tolist()
            for bbox, cid, weight in zip(true_xywh, true_cids, true_weights):
                true.add_annotation(gid, cid, bbox=bbox, weight=weight)

        return pred, true
Exemple #3
0
def parse_mscoco():
    # Test that our implementation can handle the real mscoco data
    root = ub.expandpath('~/data/standard_datasets/mscoco/')

    fpath = join(root, 'annotations/instances_val2014.json')
    img_root = normpath(ub.ensuredir((root, 'images', 'val2014')))

    # fpath = join(root, 'annotations/stuff_val2017.json')
    # img_root = normpath(ub.ensuredir((root, 'images', 'val2017')))

    import ujson
    dataset = ujson.load(open(fpath, 'rb'))

    import ndsampler
    dset = ndsampler.CocoDataset(dataset)
    dset.img_root = img_root

    gid_iter = iter(dset.imgs.keys())

    gid = ub.peek(gid_iter)

    for gid in ub.ProgIter(gid_iter):
        img = dset.imgs[gid]
        ub.grabdata(img['coco_url'], dpath=img_root, verbose=0)
        anns = [dset.anns[aid] for aid in dset.gid_to_aids[gid]]
        dset.show_image(gid=gid)

    ann = anns[0]

    segmentation = ann['segmentation']

    from PIL import Image
    gpath = join(dset.img_root, img['file_name'])
    with Image.open(gpath) as pil_img:
        np_img = np.array(pil_img)
Exemple #4
0
 def demo(WindowedSamplerDataset, key='habcam', **kwargs):
     import ndsampler
     if key == 'habcam':
         dset_fpath = ub.expandpath('~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json')
         workdir = ub.expandpath('~/work/bioharn')
         dset = ndsampler.CocoDataset(dset_fpath)
         sampler = ndsampler.CocoSampler(dset, workdir=workdir, backend=None)
     else:
         sampler = ndsampler.CocoSampler.demo(key)
     self = WindowedSamplerDataset(sampler, **kwargs)
     return self
Exemple #5
0
def detect_cli(config={}):
    """
    CommandLine:
        python -m bioharn.detect_predict --help

    CommandLine:
        python -m bioharn.detect_predict \
            --dataset=~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_test.mscoco.json \
            --deployed=/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip \
            --out_dpath=~/work/bioharn/habcam_test_out \
            --draw=100 \
            --input_dims=512,512 \
            --xpu=0 --batch_size=1

    Ignore:
        >>> config = {}
        >>> config['dataset'] = '~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json'
        >>> config['deployed'] = '/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip'
        >>> config['out_dpath'] = 'out'
    """
    import kwarray
    import ndsampler
    from os.path import basename, join, exists, isfile, isdir  # NOQA

    config = DetectPredictCLIConfig(config, cmdline=True)
    print('config = {}'.format(ub.repr2(config.asdict())))

    out_dpath = ub.expandpath(config.get('out_dpath'))

    import six
    if isinstance(config['dataset'], six.string_types):
        if config['dataset'].endswith('.json'):
            dataset_fpath = ub.expandpath(config['dataset'])
            coco_dset = ndsampler.CocoDataset(dataset_fpath)
            # Running prediction is much faster if you can build a sampler.
            sampler_backend = {
                'type': 'cog',
                'config': {
                    'compress': 'JPEG',
                },
                '_hack_old_names': False,  # flip to true to use legacy caches
            }
            sampler_backend = None
            print('coco hashid = {}'.format(coco_dset._build_hashid()))
        else:
            sampler_backend = None
            if exists(config['dataset']) and isfile(config['dataset']):
                # Single image case
                image_fpath = ub.expandpath(config['dataset'])
                coco_dset = ndsampler.CocoDataset()
                coco_dset.add_image(image_fpath)
    elif isinstance(config['dataset'], list):
        # Multiple image case
        gpaths = config['dataset']
        gpaths = [ub.expandpath(g) for g in gpaths]
        coco_dset = ndsampler.CocoDataset()
        for gpath in gpaths:
            coco_dset.add_image(gpath)
    else:
        raise TypeError(config['dataset'])

    draw = config.get('draw')
    workdir = ub.expandpath(config.get('workdir'))

    det_outdir = ub.ensuredir((out_dpath, 'pred'))

    pred_config = ub.dict_subset(config, DetectPredictConfig.default)

    print('Create sampler')
    sampler = ndsampler.CocoSampler(coco_dset, workdir=workdir,
                                    backend=sampler_backend)
    print('prepare frames')
    sampler.frames.prepare(workers=config['workers'])

    print('Create predictor')
    predictor = DetectPredictor(pred_config)
    print('Ensure model')
    predictor._ensure_model()

    pred_dataset = coco_dset.dataset.copy()
    pred_dataset['annotations'] = []
    pred_dset = ndsampler.CocoDataset(pred_dataset)

    # self = predictor
    predictor.config['verbose'] = 1
    pred_gen = predictor.predict_sampler(sampler)
    buffered_gen = AsyncBufferedGenerator(pred_gen, size=coco_dset.n_images)

    gid_to_pred = {}
    prog = ub.ProgIter(buffered_gen, total=coco_dset.n_images,
                       desc='buffered detect')
    for img_idx, (gid, dets) in enumerate(prog):
        gid_to_pred[gid] = dets

        for ann in dets.to_coco():
            ann['image_id'] = gid
            try:
                catname = ann['category_name']
                ann['category_id'] = pred_dset._resolve_to_cid(catname)
            except KeyError:
                if 'category_id' not in ann:
                    cid = pred_dset.add_category(catname)
                    ann['category_id'] = cid
            pred_dset.add_annotation(**ann)

        single_img_coco = pred_dset.subset([gid])
        single_pred_dpath = ub.ensuredir((det_outdir, 'single_image'))
        single_pred_fpath = join(single_pred_dpath, 'detections_gid_{:08d}.mscoco.json'.format(gid))
        single_img_coco.dump(single_pred_fpath, newlines=True)

        if draw is True or (draw and img_idx < draw):
            draw_outdir = ub.ensuredir((out_dpath, 'draw'))
            img_fpath = coco_dset.load_image_fpath(gid)
            gname = basename(img_fpath)
            viz_fname = ub.augpath(gname, prefix='detect_', ext='.jpg')
            viz_fpath = join(draw_outdir, viz_fname)

            image = kwimage.imread(img_fpath)

            flags = dets.scores > .2
            flags[kwarray.argmaxima(dets.scores, num=10)] = True
            top_dets = dets.compress(flags)
            toshow = top_dets.draw_on(image, alpha=None)
            # kwplot.imshow(toshow)
            kwimage.imwrite(viz_fpath, toshow, space='rgb')

    pred_fpath = join(det_outdir, 'detections.mscoco.json')
    print('Dump detections to pred_fpath = {!r}'.format(pred_fpath))
    pred_dset.dump(pred_fpath, newlines=True)
Exemple #6
0
def setup_harness(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness

    Example:
        >>> harn = setup_harness(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    nice = kwargs.get('nice', 'untitled')
    batch_size = int(kwargs.get('batch_size', 6))
    bstep = int(kwargs.get('bstep', 1))
    workers = int(kwargs.get('workers', 0))
    decay = float(kwargs.get('decay', 0.0005))
    lr = float(kwargs.get('lr', 0.001))
    dim = int(kwargs.get('dim', 416))
    xpu = kwargs.get('xpu', 'argv')
    workdir = kwargs.get('workdir', None)
    dbname = kwargs.get('dbname', 'ggr2')

    if workdir is None:
        workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname))
    ub.ensuredir(workdir)

    if dbname == 'ggr2':
        print('Creating torch CocoDataset')
        train_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018',
        )
        train_dset.hashid = 'ggr2-coco-train2018'
        vali_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018',
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir)
        vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir)

        print('Creating torch Datasets')
        datasets = {
            'train':
            MatchingCocoDataset(train_sampler,
                                train_dset,
                                workdir,
                                dim=dim,
                                augment=True),
            'vali':
            MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim),
        }
    else:
        from ibeis_utils import randomized_ibeis_dset
        datasets = randomized_ibeis_dset(dbname, dim=dim)

    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         batch_size=batch_size,
                                         num_workers=workers,
                                         shuffle=(key == 'train'),
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    xpu = nh.XPU.cast(xpu)

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            workdir,
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (MatchingNetworkLP, {
                'p': 2,
                'input_shape': (1, 3, dim, dim),
            }),
            'criterion': (nh.criterions.ContrastiveLoss, {
                'margin': 4,
                'weight': None,
            }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr,
                'weight_decay': decay,
                'momentum': 0.9,
                'nesterov': True,
            }),
            'initializer': (nh.initializers.NoOp, {}),
            'scheduler': (nh.schedulers.Exponential, {
                'gamma': 0.99,
                'stepsize': 2,
            }),
            # 'scheduler': (nh.schedulers.ListedLR, {
            #     'points': {
            #         1:   lr * 1.0,
            #         19:  lr * 1.1,
            #         20:  lr * 0.1,
            #     },
            #     'interpolate': True
            # }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss', 'pos_dist', 'brier'],
                'maximize': ['accuracy', 'neg_dist', 'mcc'],
                'patience': 40,
                'max_epoch': 40,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Exemple #7
0
def eval_detections_cli(**kw):
    """
    CommandLine:
        xdoctest -m ~/code/netharn/netharn/metrics/detect_metrics.py eval_detections_cli
    """
    import scriptconfig as scfg
    import ndsampler

    class EvalDetectionCLI(scfg.Config):
        default = {
            'true': scfg.Path(None, help='true coco dataset'),
            'pred': scfg.Path(None, help='predicted coco dataset'),
            'out_dpath': scfg.Path('./out', help='output directory')
        }
        pass

    config = EvalDetectionCLI()
    cmdline = kw.pop('cmdline', True)
    config.load(kw, cmdline=cmdline)

    true_coco = ndsampler.CocoDataset(config['true'])
    pred_coco = ndsampler.CocoDataset(config['pred'])

    from netharn.metrics.detect_metrics import DetectionMetrics
    dmet = DetectionMetrics.from_coco(true_coco, pred_coco)

    voc_info = dmet.score_voc()

    cls_info = voc_info['perclass'][0]
    tp = cls_info['tp']
    fp = cls_info['fp']
    fn = cls_info['fn']

    tpr = cls_info['tpr']
    ppv = cls_info['ppv']
    fp = cls_info['fp']

    # Compute the MCC as TN->inf
    thresh = cls_info['thresholds']

    # https://erotemic.wordpress.com/2019/10/23/closed-form-of-the-mcc-when-tn-inf/
    mcc_lim = tp / (np.sqrt(fn + tp) * np.sqrt(fp + tp))
    f1 = 2 * (ppv * tpr) / (ppv + tpr)

    draw = False
    if draw:

        mcc_idx = mcc_lim.argmax()
        f1_idx = f1.argmax()

        import kwplot
        plt = kwplot.autoplt()

        kwplot.multi_plot(
            xdata=thresh,
            ydata=mcc_lim,
            xlabel='threshold',
            ylabel='mcc*',
            fnum=1,
            pnum=(1, 4, 1),
            title='MCC*',
            color=['blue'],
        )
        plt.plot(thresh[mcc_idx], mcc_lim[mcc_idx], 'r*', markersize=20)
        plt.plot(thresh[f1_idx], mcc_lim[f1_idx], 'k*', markersize=20)

        kwplot.multi_plot(
            xdata=fp,
            ydata=tpr,
            xlabel='fp (fa)',
            ylabel='tpr (pd)',
            fnum=1,
            pnum=(1, 4, 2),
            title='ROC',
            color=['blue'],
        )
        plt.plot(fp[mcc_idx], tpr[mcc_idx], 'r*', markersize=20)
        plt.plot(fp[f1_idx], tpr[f1_idx], 'k*', markersize=20)

        kwplot.multi_plot(
            xdata=tpr,
            ydata=ppv,
            xlabel='tpr (recall)',
            ylabel='ppv (precision)',
            fnum=1,
            pnum=(1, 4, 3),
            title='PR',
            color=['blue'],
        )
        plt.plot(tpr[mcc_idx], ppv[mcc_idx], 'r*', markersize=20)
        plt.plot(tpr[f1_idx], ppv[f1_idx], 'k*', markersize=20)

        kwplot.multi_plot(
            xdata=thresh,
            ydata=f1,
            xlabel='threshold',
            ylabel='f1',
            fnum=1,
            pnum=(1, 4, 4),
            title='F1',
            color=['blue'],
        )
        plt.plot(thresh[mcc_idx], f1[mcc_idx], 'r*', markersize=20)
        plt.plot(thresh[f1_idx], f1[f1_idx], 'k*', markersize=20)
Exemple #8
0
def convert_camvid_raw_to_coco(camvid_raw_info):
    """
    Converts the raw camvid format to an MSCOCO based format, ( which lets use
    use ndsampler's COCO backend).

    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> camvid_raw_info = grab_raw_camvid()
        >>> # test with a reduced set of data
        >>> del camvid_raw_info['img_paths'][2:]
        >>> del camvid_raw_info['mask_paths'][2:]
        >>> dset = convert_camvid_raw_to_coco(camvid_raw_info)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> plt = kwplot.autoplt()
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 1))
        >>> dset.show_image(gid=1)
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 2))
        >>> dset.show_image(gid=2)
    """
    import re
    import kwimage
    import ndsampler
    print('Converting CamVid to MS-COCO format')

    dset_root, img_paths, label_path, mask_paths = ub.take(
        camvid_raw_info,
        'dset_root, img_paths, label_path, mask_paths'.split(', '))

    img_infos = {
        'img_fname': img_paths,
        'mask_fname': mask_paths,
    }
    keys = list(img_infos.keys())
    next_vals = list(zip(*img_infos.values()))
    image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals]

    dataset = {
        'img_root': dset_root,
        'images': [],
        'categories': [],
        'annotations': [],
    }

    lines = ub.readfrom(label_path).split('\n')
    lines = [line for line in lines if line]
    for line in lines:
        color_text, name = re.split('\t+', line)
        r, g, b = map(int, color_text.split(' '))
        color = (r, g, b)

        # Parse the special camvid format
        cid = (r << 16) + (g << 8) + (b << 0)
        cat = {
            'id': cid,
            'name': name,
            'color': color,
        }
        dataset['categories'].append(cat)

    for gid, img_item in enumerate(image_items, start=1):
        img = {
            'id': gid,
            'file_name': img_item['img_fname'],
            # nonstandard image field
            'segmentation': img_item['mask_fname'],
        }
        dataset['images'].append(img)

    dset = ndsampler.CocoDataset(dataset)
    dset.rename_categories({'Void': 'background'})

    assert dset.name_to_cat['background']['id'] == 0
    dset.name_to_cat['background'].setdefault('alias', []).append('Void')

    if False:
        _define_camvid_class_hierarcy(dset)

    if 1:
        # TODO: Binarize CCs (and efficiently encode if possible)
        import numpy as np

        bad_info = []
        once = False

        # Add images
        dset.remove_all_annotations()
        for gid, img in ub.ProgIter(dset.imgs.items(),
                                    desc='parse label masks'):
            mask_fpath = join(dset_root, img['segmentation'])

            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)

            cids = set(np.unique(cid_mask)) - {0}

            for cid in cids:
                if cid not in dset.cats:
                    if gid == 618:
                        # Handle a known issue with image 618
                        c_mask = (cid == cid_mask).astype(np.uint8)
                        total_bad = c_mask.sum()
                        if total_bad < 32:
                            if not once:
                                print(
                                    'gid 618 has a few known bad pixels, ignoring them'
                                )
                                once = True
                            continue
                        else:
                            raise Exception('more bad pixels than expected')
                    else:
                        raise Exception(
                            'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))

                    # bad_rgb = cid_to_rgb(cid)
                    # print('bad_rgb = {!r}'.format(bad_rgb))
                    # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))
                    # bad_info.append({
                    #     'gid': gid,
                    #     'cid': cid,
                    # })
                else:
                    ann = {
                        'category_id': cid,
                        'image_id': gid
                        # 'segmentation': mask.to_coco()
                    }
                    assert cid in dset.cats
                    c_mask = (cid == cid_mask).astype(np.uint8)
                    mask = kwimage.Mask(c_mask, 'c_mask')

                    box = kwimage.Boxes([mask.get_xywh()], 'xywh')
                    # box = mask.to_boxes()

                    ann['bbox'] = ub.peek(box.to_coco())
                    ann['segmentation'] = mask.to_coco()
                    dset.add_annotation(**ann)

        if 0:
            bad_cids = [i['cid'] for i in bad_info]
            print(sorted([c['color'] for c in dataset['categories']]))
            print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info])))

            gid = 618
            img = dset.imgs[gid]
            mask_fpath = join(dset_root, img['segmentation'])
            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)
            cid_hist = ub.dict_hist(cid_mask.ravel())

            bad_cid_hist = {}
            for cid in bad_cids:
                bad_cid_hist[cid] = cid_hist.pop(cid)

            import kwplot
            kwplot.autompl()
            kwplot.imshow(rgb_mask)

    if 0:
        import kwplot
        plt = kwplot.autoplt()
        plt.clf()
        dset.show_image(1)

        import xdev
        gid_list = list(dset.imgs)
        for gid in xdev.InteractiveIter(gid_list):
            dset.show_image(gid)
            xdev.InteractiveIter.draw()

    dset._build_index()
    dset._build_hashid()
    return dset
Exemple #9
0
def grab_coco_camvid():
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> dset = grab_coco_camvid()
        >>> print('dset = {!r}'.format(dset))
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> plt = kwplot.autoplt()
        >>> plt.clf()
        >>> dset.show_image(gid=1)

    Ignore:
        import xdev
        gid_list = list(dset.imgs)
        for gid in xdev.InteractiveIter(gid_list):
            dset.show_image(gid)
            xdev.InteractiveIter.draw()
    """
    import ndsampler
    cache_dpath = ub.ensure_app_cache_dir('netharn', 'camvid')
    coco_fpath = join(cache_dpath, 'camvid.mscoco.json')

    # Need to manually bump this if you make a change to loading
    SCRIPT_VERSION = 'v4'

    # Ubelt's stamp-based caches are super cheap and let you take control of
    # the data format.
    stamp = ub.CacheStamp('camvid_coco',
                          cfgstr=SCRIPT_VERSION,
                          dpath=cache_dpath,
                          product=coco_fpath,
                          hasher='sha1',
                          verbose=3)
    if stamp.expired():
        camvid_raw_info = grab_raw_camvid()
        dset = convert_camvid_raw_to_coco(camvid_raw_info)
        with ub.Timer('dumping MS-COCO dset to: {}'.format(coco_fpath)):
            dset.dump(coco_fpath)
        # Mark this process as completed by saving a small file containing the
        # hash of the "product" you are stamping.
        stamp.renew()

    # We can also cache the index build step independently. This uses
    # ubelt.Cacher, which is pickle based, and writes the actual object to
    # disk. Each type of caching has its own uses and tradeoffs.
    cacher = ub.Cacher('prebuilt-coco',
                       cfgstr=SCRIPT_VERSION,
                       dpath=cache_dpath,
                       verbose=3)
    dset = cacher.tryload()
    if dset is None:
        print('Reading coco_fpath = {!r}'.format(coco_fpath))
        dset = ndsampler.CocoDataset(coco_fpath, tag='camvid')
        # Directly save the file to disk.
        dset._build_index()
        dset._build_hashid()
        cacher.save(dset)

    camvid_dset = dset
    print('Loaded camvid_dset = {!r}'.format(camvid_dset))
    return camvid_dset
Exemple #10
0
def grab_tiny_imagenet_as_coco():
    import ubelt as ub

    url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
    dpath = ub.ensure_app_cache_dir('netharn', 'tiny-imagenet-200')
    dset_root = join(dpath, 'tiny-imagenet-200')

    zip_fpath = ub.grabdata(url, dpath=dpath)

    if not exists(dset_root):
        import zipfile
        zip_ref = zipfile.ZipFile(zip_fpath, 'r')
        zip_ref.extractall(dpath)
        zip_ref.close()

    tiny_imgnet_info = {
        'train': join(dset_root, 'train'),
        'test': join(dset_root, 'test'),
        'vali': join(dset_root, 'val'),

        'wnids': join(dset_root, 'wnids.txt'),
        'words': join(dset_root, 'words.txt'),
    }

    import glob
    train_annots = list(glob.glob(join(tiny_imgnet_info['train'], '*/*boxes.txt')))
    vali_annots = list(glob.glob(join(tiny_imgnet_info['vali'], 'val_annotations.txt')))

    import ndsampler

    img_root = {
        'train': join(tiny_imgnet_info['train']),
        'vali': join(tiny_imgnet_info['vali'], 'images'),
        'test': join(tiny_imgnet_info['test'], 'images'),
    }
    gpaths = {
        'train': list(glob.glob(join(tiny_imgnet_info['train'], '*/images/*.JPEG'))),
        'vali': list(glob.glob(join(tiny_imgnet_info['vali'], 'images/*.JPEG'))),
        'test': list(glob.glob(join(tiny_imgnet_info['test'], 'images/*.JPEG')))
    }
    annots_text = {
        'train': ''.join(ub.readfrom(fpath) for fpath in train_annots),
        'vali': ''.join(ub.readfrom(fpath) for fpath in vali_annots),
    }
    coco_datasets = {
        'train': ndsampler.CocoDataset(tag='tiny-imagenet-train'),
        'vali': ndsampler.CocoDataset(tag='tiny-imagenet-vali'),
    }

    for catname in (_ for _ in ub.readfrom(tiny_imgnet_info['wnids']).split('\n') if _):
        for dset in coco_datasets.values():
            dset.add_category(name=catname)

    for tag in ['train', 'vali']:
        gpaths_ = gpaths[tag]
        annots_ = annots_text[tag]
        dset = coco_datasets[tag]

        dset.img_root = img_root[tag]

        for gpath in gpaths_:
            dset.add_image(file_name=gpath)

        for line in (_ for _ in annots_.split('\n') if _):
            parts = line.split('\t')
            if tag == 'train':
                gname = parts[0]
                catname = gname.split('_')[0]
                x, y, w, h = list(map(float, parts[1:]))
                gpath = join(img_root[tag], catname, 'images', gname)
            else:
                gname, catname = parts[0:2]
                x, y, w, h = list(map(float, parts[2:]))
                gpath = join(img_root[tag], gname)

            bbox = (x, y, w + 1, h + 1)
            cat = dset.name_to_cat[catname]
            img = dset.index.file_name_to_img[gpath]

            dset.add_annotation(image_id=img['id'], bbox=bbox,
                                category_id=cat['id'])

        dset._ensure_imgsize()
        dset._build_hashid()
        print('dset.hashid = {!r}'.format(dset.hashid))

    return coco_datasets