Ejemplo n.º 1
0
def setup_sampler(config):
    workdir = nh.configure_workdir(config,
                                   workdir=join('~/work/siam-ibeis2',
                                                config['dbname']))

    # TODO: cleanup and hook into ibeis AI
    if config['dbname'] == 'ggr2':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_train2018.json'),
            img_root=join(root, 'ggr2-coco/images/train2018'),
        )
        train_dset.hashid = 'ggr2-coco-train2018'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_val2018.json'),
            img_root=join(root, 'ggr2-coco/images/val2018'),
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    if config['dbname'] == 'ggr2-revised':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/ggr2.coco.revised')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_train2019.json'),
            img_root=join(root, 'images/train2019'),
        )
        train_dset.hashid = 'ggr2-coco-revised-train2019'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_val2019.json'),
            img_root=join(root, 'images/val2019'),
        )
        vali_dset.hashid = 'ggr2-coco-revised-val2019'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    else:
        raise KeyError(config['dbname'])

    return samplers, workdir
Ejemplo n.º 2
0
 def demo(cls, **kwargs):
     from netharn.data.grab_camvid import grab_coco_camvid
     import ndsampler
     dset = grab_coco_camvid()
     sampler = ndsampler.CocoSampler(dset, workdir=None, backend='npy')
     self = cls(sampler, **kwargs)
     return self
Ejemplo n.º 3
0
 def demo(WindowedSamplerDataset, key='habcam', **kwargs):
     import ndsampler
     if key == 'habcam':
         dset_fpath = ub.expandpath('~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json')
         workdir = ub.expandpath('~/work/bioharn')
         dset = ndsampler.CocoDataset(dset_fpath)
         sampler = ndsampler.CocoSampler(dset, workdir=workdir, backend=None)
     else:
         sampler = ndsampler.CocoSampler.demo(key)
     self = WindowedSamplerDataset(sampler, **kwargs)
     return self
Ejemplo n.º 4
0
def grab_camvid_sampler():
    """
    Grab a ndsampler.CocoSampler object for the CamVid dataset.

    Returns:
        ndsampler.CocoSampler: sampler

    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> sampler = grab_camvid_sampler()
        >>> print('sampler = {!r}'.format(sampler))
        >>> # sampler.load_sample()
        >>> for gid in ub.ProgIter(sampler.image_ids, desc='load image'):
        >>>     img = sampler.load_image(gid)
    """
    import ndsampler
    dset = grab_coco_camvid()
    workdir = ub.ensure_app_cache_dir('camvid')
    sampler = ndsampler.CocoSampler(dset, workdir=workdir)
    return sampler
Ejemplo n.º 5
0
    def __init__(self, storage_mode='numpy', return_mode='tensor', total=24e7):
        self.return_mode = return_mode
        self.storage_mode = storage_mode

        assert self.return_mode in {'tensor', 'dict', 'tuple', 'list'}

        if storage_mode == 'numpy':
            self.data = np.array([x for x in range(int(total))])
        elif storage_mode == 'python':
            self.data = [x for x in range(int(total))]
        elif storage_mode == 'ndsampler-sql':
            import ndsampler
            import kwcoco
            from kwcoco.coco_sql_dataset import ensure_sql_coco_view
            dset = kwcoco.CocoDataset.demo('vidshapes',
                                           num_videos=1,
                                           num_frames=total,
                                           gsize=(64, 64))
            dset = ensure_sql_coco_view(dset)
            print('dset.uri = {!r}'.format(dset.uri))
            dset.hashid = 'fake-hashid'
            sampler = ndsampler.CocoSampler(dset, backend=None)
            self.data = sampler
            # sampler.load_item(0)
            # tr = sampler.regions.get_item(0)
            # sampler.load_sample(tr)
            # assert total <= 1000
            # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total))
            # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total))
        elif storage_mode == 'ndsampler':
            import ndsampler
            # assert total <= 10000
            sampler = ndsampler.CocoSampler.demo('vidshapes',
                                                 num_videos=1,
                                                 num_frames=total,
                                                 gsize=(64, 64))
            self.data = sampler
        else:
            raise KeyError(storage_mode)
Ejemplo n.º 6
0
def detect_cli(config={}):
    """
    CommandLine:
        python -m bioharn.detect_predict --help

    CommandLine:
        python -m bioharn.detect_predict \
            --dataset=~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_test.mscoco.json \
            --deployed=/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip \
            --out_dpath=~/work/bioharn/habcam_test_out \
            --draw=100 \
            --input_dims=512,512 \
            --xpu=0 --batch_size=1

    Ignore:
        >>> config = {}
        >>> config['dataset'] = '~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json'
        >>> config['deployed'] = '/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip'
        >>> config['out_dpath'] = 'out'
    """
    import kwarray
    import ndsampler
    from os.path import basename, join, exists, isfile, isdir  # NOQA

    config = DetectPredictCLIConfig(config, cmdline=True)
    print('config = {}'.format(ub.repr2(config.asdict())))

    out_dpath = ub.expandpath(config.get('out_dpath'))

    import six
    if isinstance(config['dataset'], six.string_types):
        if config['dataset'].endswith('.json'):
            dataset_fpath = ub.expandpath(config['dataset'])
            coco_dset = ndsampler.CocoDataset(dataset_fpath)
            # Running prediction is much faster if you can build a sampler.
            sampler_backend = {
                'type': 'cog',
                'config': {
                    'compress': 'JPEG',
                },
                '_hack_old_names': False,  # flip to true to use legacy caches
            }
            sampler_backend = None
            print('coco hashid = {}'.format(coco_dset._build_hashid()))
        else:
            sampler_backend = None
            if exists(config['dataset']) and isfile(config['dataset']):
                # Single image case
                image_fpath = ub.expandpath(config['dataset'])
                coco_dset = ndsampler.CocoDataset()
                coco_dset.add_image(image_fpath)
    elif isinstance(config['dataset'], list):
        # Multiple image case
        gpaths = config['dataset']
        gpaths = [ub.expandpath(g) for g in gpaths]
        coco_dset = ndsampler.CocoDataset()
        for gpath in gpaths:
            coco_dset.add_image(gpath)
    else:
        raise TypeError(config['dataset'])

    draw = config.get('draw')
    workdir = ub.expandpath(config.get('workdir'))

    det_outdir = ub.ensuredir((out_dpath, 'pred'))

    pred_config = ub.dict_subset(config, DetectPredictConfig.default)

    print('Create sampler')
    sampler = ndsampler.CocoSampler(coco_dset, workdir=workdir,
                                    backend=sampler_backend)
    print('prepare frames')
    sampler.frames.prepare(workers=config['workers'])

    print('Create predictor')
    predictor = DetectPredictor(pred_config)
    print('Ensure model')
    predictor._ensure_model()

    pred_dataset = coco_dset.dataset.copy()
    pred_dataset['annotations'] = []
    pred_dset = ndsampler.CocoDataset(pred_dataset)

    # self = predictor
    predictor.config['verbose'] = 1
    pred_gen = predictor.predict_sampler(sampler)
    buffered_gen = AsyncBufferedGenerator(pred_gen, size=coco_dset.n_images)

    gid_to_pred = {}
    prog = ub.ProgIter(buffered_gen, total=coco_dset.n_images,
                       desc='buffered detect')
    for img_idx, (gid, dets) in enumerate(prog):
        gid_to_pred[gid] = dets

        for ann in dets.to_coco():
            ann['image_id'] = gid
            try:
                catname = ann['category_name']
                ann['category_id'] = pred_dset._resolve_to_cid(catname)
            except KeyError:
                if 'category_id' not in ann:
                    cid = pred_dset.add_category(catname)
                    ann['category_id'] = cid
            pred_dset.add_annotation(**ann)

        single_img_coco = pred_dset.subset([gid])
        single_pred_dpath = ub.ensuredir((det_outdir, 'single_image'))
        single_pred_fpath = join(single_pred_dpath, 'detections_gid_{:08d}.mscoco.json'.format(gid))
        single_img_coco.dump(single_pred_fpath, newlines=True)

        if draw is True or (draw and img_idx < draw):
            draw_outdir = ub.ensuredir((out_dpath, 'draw'))
            img_fpath = coco_dset.load_image_fpath(gid)
            gname = basename(img_fpath)
            viz_fname = ub.augpath(gname, prefix='detect_', ext='.jpg')
            viz_fpath = join(draw_outdir, viz_fname)

            image = kwimage.imread(img_fpath)

            flags = dets.scores > .2
            flags[kwarray.argmaxima(dets.scores, num=10)] = True
            top_dets = dets.compress(flags)
            toshow = top_dets.draw_on(image, alpha=None)
            # kwplot.imshow(toshow)
            kwimage.imwrite(viz_fpath, toshow, space='rgb')

    pred_fpath = join(det_outdir, 'detections.mscoco.json')
    print('Dump detections to pred_fpath = {!r}'.format(pred_fpath))
    pred_dset.dump(pred_fpath, newlines=True)
Ejemplo n.º 7
0
def _coerce_datasets(config):
    import netharn as nh
    import ndsampler
    import numpy as np
    from torchvision import transforms
    coco_datasets = nh.api.Datasets.coerce(config)
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'):
        sampler.frames.prepare(workers=config['workers'])

    # TODO: basic ndsampler torch dataset, likely has to support the transforms
    # API, bleh.

    transform = transforms.Compose([
        transforms.Resize(config['input_dims']),
        transforms.CenterCrop(config['input_dims']),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])

    torch_datasets = {
        key: SamplerDataset(
            sapmler, transform=transform,
            # input_dims=config['input_dims'],
            # augmenter=config['augmenter'] if key == 'train' else None,
        )
        for key, sapmler in samplers.items()
    }
    # self = torch_dset = torch_datasets['train']

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        import kwarray
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)

        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
        input_stats = cacher.tryload()

        from netharn.data.channel_spec import ChannelSpec
        channels = ChannelSpec.coerce(config['channels'])

        if input_stats is None:
            # Use parallel workers to load data faster
            from netharn.data.data_containers import container_collate
            from functools import partial
            collate_fn = partial(container_collate, num_devices=1)

            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=collate_fn,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])

            # Track moving average of each fused channel stream
            channel_stats = {key: nh.util.RunningStats()
                             for key in channels.keys()}
            assert len(channel_stats) == 1, (
                'only support one fused stream for now')
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                if isinstance(batch, (tuple, list)):
                    inputs = {'rgb': batch[0]}  # make assumption
                else:
                    inputs = batch['inputs']

                for key, val in inputs.items():
                    try:
                        for part in val.numpy():
                            channel_stats[key].update(part)
                    except ValueError:  # final batch broadcast error
                        pass

            perchan_input_stats = {}
            for key, running in channel_stats.items():
                running = ub.peek(channel_stats.values())
                perchan_stats = running.simple(axis=(1, 2))
                perchan_input_stats[key] = {
                    'std': perchan_stats['mean'].round(3),
                    'mean': perchan_stats['std'].round(3),
                }

            input_stats = ub.peek(perchan_input_stats.values())
            cacher.save(input_stats)
    else:
        input_stats = {}

    torch_loaders = {
        tag: dset.make_loader(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_workers=config['workers'],
            shuffle=(tag == 'train'),
            balance=(config['balance'] if tag == 'train' else None),
            pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    dataset_info = {
        'torch_datasets': torch_datasets,
        'torch_loaders': torch_loaders,
        'input_stats': input_stats
    }
    return dataset_info
Ejemplo n.º 8
0
def setup_harn(cmdline=True, **kw):
    """
    Ignore:
        >>> from object_detection import *  # NOQA
        >>> cmdline = False
        >>> kw = {
        >>>     'train_dataset': '~/data/VOC/voc-trainval.mscoco.json',
        >>>     'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json',
        >>> }
        >>> harn = setup_harn(**kw)
    """
    import ndsampler
    from ndsampler import coerce_data
    # Seed other global rngs just in case something uses them under the hood
    kwarray.seed_global(1129989262, offset=1797315558)

    config = DetectFitConfig(default=kw, cmdline=cmdline)

    nh.configure_hacks(config)  # fix opencv bugs
    ub.ensuredir(config['workdir'])

    # Load ndsampler.CocoDataset objects from info in the config
    subsets = coerce_data.coerce_datasets(config)

    samplers = {}
    for tag, subset in subsets.items():
        print('subset = {!r}'.format(subset))
        sampler = ndsampler.CocoSampler(subset, workdir=config['workdir'])
        samplers[tag] = sampler

    torch_datasets = {
        tag: DetectDataset(
            sampler,
            input_dims=config['input_dims'],
            augment=config['augment'] if (tag == 'train') else False,
        )
        for tag, sampler in samplers.items()
    }

    print('make loaders')
    loaders_ = {
        tag:
        torch.utils.data.DataLoader(dset,
                                    batch_size=config['batch_size'],
                                    num_workers=config['workers'],
                                    shuffle=(tag == 'train'),
                                    collate_fn=nh.data.collate.padded_collate,
                                    pin_memory=True)
        for tag, dset in torch_datasets.items()
    }
    # for x in ub.ProgIter(loaders_['train']):
    #     pass

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2')
        input_stats = cacher.tryload()
        if input_stats is None:
            # Use parallel workers to load data faster
            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=nh.data.collate.padded_collate,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            # Track moving average
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = None
    print('input_stats = {!r}'.format(input_stats))

    initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal')
    print('initializer_ = {!r}'.format(initializer_))

    arch = config['arch']
    if arch == 'yolo2':

        if False:
            dset = samplers['train'].dset
            print('dset = {!r}'.format(dset))
            # anchors = yolo2.find_anchors(dset)

        anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                            (5.05587, 8.09892), (9.47112, 4.84053),
                            (11.2364, 10.0071)])

        classes = samplers['train'].classes
        model_ = (yolo2.Yolo2, {
            'classes': classes,
            'anchors': anchors,
            'conf_thresh': 0.001,
            'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
        })
        model = model_[0](**model_[1])
        model._initkw = model_[1]

        criterion_ = (
            yolo2.YoloLoss,
            {
                'coder': model.coder,
                'seen': 0,
                'coord_scale': 1.0,
                'noobject_scale': 1.0,
                'object_scale': 5.0,
                'class_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                # 'seen_thresh': 12800,
            })
    else:
        raise KeyError(arch)

    scheduler_ = nh.Scheduler.coerce(config)
    print('scheduler_ = {!r}'.format(scheduler_))

    optimizer_ = nh.Optimizer.coerce(config)
    print('optimizer_ = {!r}'.format(optimizer_))

    dynamics_ = nh.Dynamics.coerce(config)
    print('dynamics_ = {!r}'.format(dynamics_))

    xpu = nh.XPU.coerce(config['xpu'])
    print('xpu = {!r}'.format(xpu))

    import sys

    hyper = nh.HyperParams(
        **{
            'nice':
            config['nice'],
            'workdir':
            config['workdir'],
            'datasets':
            torch_datasets,
            'loaders':
            loaders_,
            'xpu':
            xpu,
            'model':
            model,
            'criterion':
            criterion_,
            'initializer':
            initializer_,
            'optimizer':
            optimizer_,
            'dynamics':
            dynamics_,

            # 'optimizer': (torch.optim.SGD, {
            #     'lr': lr_step_points[0],
            #     'momentum': 0.9,
            #     'dampening': 0,
            #     # multiplying by batch size was one of those unpublished details
            #     'weight_decay': decay * simulated_bsize,
            # }),
            'scheduler':
            scheduler_,
            'monitor': (
                nh.Monitor,
                {
                    'minimize': ['loss'],
                    # 'maximize': ['mAP'],
                    'patience': config['patience'],
                    'max_epoch': config['max_epoch'],
                    'smoothing': .6,
                }),
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': config['batch_size'],
                'nice': config['nice'],
                'ovthresh': config['ovthresh'],  # used in mAP computation
            },
            'extra': {
                'config': ub.repr2(config.asdict()),
                'argv': sys.argv,
            }
        })
    print('hyper = {!r}'.format(hyper))
    print('make harn')
    harn = DetectHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 2,
        'keep_freq': 30,
        'export_modules': ['netharn'],  # TODO
        'prog_backend': 'progiter',  # alternative: 'tqdm'
        'keyboard_debug': True,
    })
    harn.intervals.update({
        'log_iter_train': 50,
    })
    harn.fit_config = config
    print('harn = {!r}'.format(harn))
    print('samplers = {!r}'.format(samplers))
    return harn
Ejemplo n.º 9
0
def setup_harness(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness

    Example:
        >>> harn = setup_harness(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    nice = kwargs.get('nice', 'untitled')
    batch_size = int(kwargs.get('batch_size', 6))
    bstep = int(kwargs.get('bstep', 1))
    workers = int(kwargs.get('workers', 0))
    decay = float(kwargs.get('decay', 0.0005))
    lr = float(kwargs.get('lr', 0.001))
    dim = int(kwargs.get('dim', 416))
    xpu = kwargs.get('xpu', 'argv')
    workdir = kwargs.get('workdir', None)
    dbname = kwargs.get('dbname', 'ggr2')

    if workdir is None:
        workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname))
    ub.ensuredir(workdir)

    if dbname == 'ggr2':
        print('Creating torch CocoDataset')
        train_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018',
        )
        train_dset.hashid = 'ggr2-coco-train2018'
        vali_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018',
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir)
        vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir)

        print('Creating torch Datasets')
        datasets = {
            'train':
            MatchingCocoDataset(train_sampler,
                                train_dset,
                                workdir,
                                dim=dim,
                                augment=True),
            'vali':
            MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim),
        }
    else:
        from ibeis_utils import randomized_ibeis_dset
        datasets = randomized_ibeis_dset(dbname, dim=dim)

    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         batch_size=batch_size,
                                         num_workers=workers,
                                         shuffle=(key == 'train'),
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    xpu = nh.XPU.cast(xpu)

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            workdir,
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (MatchingNetworkLP, {
                'p': 2,
                'input_shape': (1, 3, dim, dim),
            }),
            'criterion': (nh.criterions.ContrastiveLoss, {
                'margin': 4,
                'weight': None,
            }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr,
                'weight_decay': decay,
                'momentum': 0.9,
                'nesterov': True,
            }),
            'initializer': (nh.initializers.NoOp, {}),
            'scheduler': (nh.schedulers.Exponential, {
                'gamma': 0.99,
                'stepsize': 2,
            }),
            # 'scheduler': (nh.schedulers.ListedLR, {
            #     'points': {
            #         1:   lr * 1.0,
            #         19:  lr * 1.1,
            #         20:  lr * 0.1,
            #     },
            #     'interpolate': True
            # }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss', 'pos_dist', 'brier'],
                'maximize': ['accuracy', 'neg_dist', 'mcc'],
                'patience': 40,
                'max_epoch': 40,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Ejemplo n.º 10
0
def setup_harn(cmdline=True, **kw):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples'))
        >>> from sseg_camvid import *  # NOQA
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4)
    """
    import sys
    import ndsampler

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    assert config['datasets'] == 'special:camvid'

    coco_datasets = setup_coco_datasets()

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augment=((tag == 'train') and config['augment']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True, pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset, mode=mode)
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    initializer_ = nh.Initializer.coerce(config)

    if config['arch'] == 'unet':
        # Note: UNet can get through 256x256 images at a rate of ~17Hz with
        # batch_size=8. This is pretty slow and can likely be improved by fixing
        # some of the weird padding / mirror stuff I have to do in unet to get
        # output_dims = input_dims.
        from netharn.models.unet import UNet
        model_ = (UNet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'segnet':
        from netharn.models.segnet import Segnet
        model_ = (Segnet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'psp':
        from netharn.models.psp import PSPNet_Resnet50_8s
        model_ = (PSPNet_Resnet50_8s, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'deeplab':
        from netharn.models.deeplab import DeepLab_ASPP
        model_ = (DeepLab_ASPP, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })

    else:
        raise KeyError(config['arch'])

    if config['init'] == 'cls':
        initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': config['focus'],
            'weight': class_weights,
            # 'reduction': 'none',
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Ejemplo n.º 11
0
def setup_harn(cmdline=True, **kw):
    """
    CommandLine:
        xdoctest -m netharn.examples.segmentation setup_harn

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2)
    """
    import sys
    import ndsampler
    import kwarray
    # kwarray.seed_global(2108744082)

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    coco_datasets = nh.api.Datasets.coerce(config)
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        try:
            sampler.frames.prepare(workers=config['workers'])
        except AttributeError:
            pass

    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True,
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset,
                                                  mode=mode,
                                                  workers=config['workers'])
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    if config['normalize_inputs']:
        stats_dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)),
                                     rng=0)[0:min(1000, len(stats_dset))]
        stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3')
        input_stats = cacher.tryload()
        if input_stats is None:
            loader = torch.utils.data.DataLoader(
                stats_subset,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = {}

    print('input_stats = {!r}'.format(input_stats))

    # TODO: infer numbr of channels
    model_ = (SegmentationModel, {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': torch_datasets['train'].classes.__json__(),
        'in_channels': 3,
    })

    initializer_ = nh.Initializer.coerce(config)
    # if config['init'] == 'cls':
    #     initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),
        datasets=torch_datasets,
        loaders=torch_loaders,
        model=model_,
        initializer=initializer_,
        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),
        criterion=(
            nh.criterions.FocalLoss,
            {
                'focus': config['focus'],
                'weight': class_weights,
                # 'reduction': 'none',
            }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),
        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        })

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 2,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Ejemplo n.º 12
0
def setup_harn(cmdline=True, **kw):
    """
    This creates the "The Classification Harness" (i.e. core ClfHarn object).
    This is where we programmatically connect our program arguments with the
    netharn HyperParameter standards. We are using :module:`scriptconfig` to
    capture these, but you could use click / argparse / etc.

    This function has the responsibility of creating our torch datasets,
    lazy computing input statistics, specifying our model architecture,
    schedule, initialization, optimizer, dynamics, XPU etc. These can usually
    be coerced using netharn API helpers and a "standardized" config dict. See
    the function code for details.

    Args:
        cmdline (bool, default=True):
            if True, behavior will be modified based on ``sys.argv``.
            Note this will activate the scriptconfig ``--help``, ``--dump`` and
            ``--config`` interactions.

    Kwargs:
        **kw: the overrides the default config for :class:`ClfConfig`.
            Note, command line flags have precedence if cmdline=True.

    Returns:
        ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn`
            object.

    Example:
        >>> # xdoctest: +SKIP
        >>> kw = {'datasets': 'special:shapes256'}
        >>> cmdline = False
        >>> harn = setup_harn(cmdline, **kw)
        >>> harn.initialize()
    """
    import ndsampler
    config = ClfConfig(default=kw)
    config.load(cmdline=cmdline)
    print('config = {}'.format(ub.repr2(config.asdict())))

    nh.configure_hacks(config)
    coco_datasets = nh.api.Datasets.coerce(config)

    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['sampler_backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        sampler.frames.prepare(workers=config['workers'])

    torch_datasets = {
        'train':
        ClfDataset(
            samplers['train'],
            input_dims=config['input_dims'],
            augmenter=config['augmenter'],
        ),
        'vali':
        ClfDataset(samplers['vali'],
                   input_dims=config['input_dims'],
                   augmenter=False),
    }

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)

        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
        input_stats = cacher.tryload()

        channels = ChannelSpec.coerce(config['channels'])

        if input_stats is None:
            # Use parallel workers to load data faster
            from netharn.data.data_containers import container_collate
            from functools import partial
            collate_fn = partial(container_collate, num_devices=1)

            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=collate_fn,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])

            # Track moving average of each fused channel stream
            channel_stats = {
                key: nh.util.RunningStats()
                for key in channels.keys()
            }
            assert len(channel_stats) == 1, (
                'only support one fused stream for now')
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                for key, val in batch['inputs'].items():
                    try:
                        for part in val.numpy():
                            channel_stats[key].update(part)
                    except ValueError:  # final batch broadcast error
                        pass

            perchan_input_stats = {}
            for key, running in channel_stats.items():
                running = ub.peek(channel_stats.values())
                perchan_stats = running.simple(axis=(1, 2))
                perchan_input_stats[key] = {
                    'std': perchan_stats['mean'].round(3),
                    'mean': perchan_stats['std'].round(3),
                }

            input_stats = ub.peek(perchan_input_stats.values())
            cacher.save(input_stats)
    else:
        input_stats = {}

    torch_loaders = {
        tag: dset.make_loader(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_workers=config['workers'],
            shuffle=(tag == 'train'),
            balance=(config['balance'] if tag == 'train' else None),
            pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    initializer_ = None
    classes = torch_datasets['train'].classes

    modelkw = {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': classes.__json__(),
        'channels': channels,
    }
    model = ClfModel(**modelkw)
    model._initkw = modelkw

    if initializer_ is None:
        initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(name=config['name'],
                           workdir=config['workdir'],
                           xpu=nh.XPU.coerce(config['xpu']),
                           datasets=torch_datasets,
                           loaders=torch_loaders,
                           model=model,
                           criterion=None,
                           optimizer=nh.Optimizer.coerce(config),
                           dynamics=nh.Dynamics.coerce(config),
                           scheduler=nh.Scheduler.coerce(config),
                           initializer=initializer_,
                           monitor=(nh.Monitor, {
                               'minimize': ['loss'],
                               'patience': config['patience'],
                               'max_epoch': config['max_epoch'],
                               'smoothing': 0.0,
                           }),
                           other={
                               'name': config['name'],
                               'batch_size': config['batch_size'],
                               'balance': config['balance'],
                           },
                           extra={
                               'argv': sys.argv,
                               'config': ub.repr2(config.asdict()),
                           })
    harn = ClfHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 3,
        'keep_freq': 10,
        'tensorboard_groups': ['loss'],
        'eager_dump_tensorboard': True,
    })
    harn.intervals.update({})
    harn.script_config = config
    return harn
Ejemplo n.º 13
0
def setup_harn(cmdline=True, **kwargs):
    """
    cmdline, kwargs = False, {}
    """
    import sys
    import ndsampler

    config = ImageClfConfig(default=kwargs)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3)
    data = cacher.tryload()
    if data is None:
        data = grab_tiny_imagenet_as_coco()
        cacher.save(data)
    coco_datasets = data  # setup_coco_datasets()
    dset = coco_datasets['train']
    print('train dset = {!r}'.format(dset))

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: ImagClfDataset(
            sampler, config['input_dims'],
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    import torchvision
    # TODO: netharn should allow for this
    model_ = torchvision.models.resnet50(pretrained=False)

    # model_ = (, {
    #     'classes': torch_datasets['train'].classes,
    #     'in_channels': 3,
    # })
    initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': 0.0,
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = ImageClfHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Ejemplo n.º 14
0
def test_variable_backend():
    """
    CommandLine:
        xdoctest -m $HOME/code/ndsampler/tests/tests_frame_backends.py test_variable_backend
        --debug-validate-cog --debug-load-cog
    """
    try:
        import gdal  # NOQA
    except ImportError:
        import pytest
        pytest.skip('cog requires gdal')
    import ndsampler
    import kwcoco
    import ubelt as ub
    dpath = ub.ensure_app_cache_dir('ndsampler/tests/test_variable_backend')
    ub.delete(dpath)
    ub.ensuredir(dpath)

    fnames = [
        'test_rgb_255.jpg',
        'test_gray_255.jpg',
        'test_rgb_255.png',
        'test_rgba_255.png',
        'test_gray_255.png',
        'test_rgb_01.tif',
        'test_rgb_255.tif',
        'test_rgba_01.tif',
        'test_rgba_255.tif',
        'test_gray_01.tif',
        'test_gray_255.tif',
    ]
    import kwarray
    import kwimage
    fpaths = []
    rng = kwarray.ensure_rng(0)

    h, w = 1200, 1055

    for fname in ub.ProgIter(fnames, desc='create test data'):
        if 'rgb_' in fname:
            data = rng.rand(h, w, 3)
        elif 'rgba_' in fname:
            data = rng.rand(h, w, 4)
        elif 'gray_' in fname:
            data = rng.rand(h, w, 1)

        if '01' in fname:
            pass
        elif '255' in fname:
            data = (data * 255).astype(np.uint8)

        fpath = join(dpath, fname)
        fpaths.append(fpath)
        kwimage.imwrite(fpath, data)

    for fpath in fpaths:
        data = kwimage.imread(fpath)
        print(
            ub.repr2(
                {
                    'fname': basename(fpath),
                    'data.shape': data.shape,
                    'data.dtype': data.dtype,
                },
                nl=0))

    dset = kwcoco.CocoDataset.from_image_paths(fpaths)
    sampler = ndsampler.CocoSampler(dset, backend='cog', workdir=dpath)
    frames = sampler.frames
    # frames.prepare()

    if 1:
        for gid in frames.image_ids:
            print('======== < START IMAGE ID > ===============')
            gpath, cache_gpath = frames._gnames(gid)
            print('gpath = {!r}'.format(gpath))
            print('cache_gpath = {!r}'.format(cache_gpath))

            image = frames.load_image(gid)
            print('image = {!r}'.format(image))
            print('image.shape = {!r}'.format(image.shape))
            print('image.dtype = {!r}'.format(image.dtype))

            subdata = image[0:8, 0:8]
            print('subdata.dtype = {!r}'.format(subdata.dtype))
            print('subdata.shape = {!r}'.format(subdata.shape))
            # info = ub.cmd('gdalinfo ' + cache_gpath, verbose=0)
            # print('GDAL INFO:')
            # print(ub.indent(info['out']))
            # assert info['ret'] == 0

            # dataset = gdal.OpenEx(cache_gpath)
            dataset = gdal.Open(cache_gpath, gdal.GA_ReadOnly)
            md = dataset.GetMetadata('IMAGE_STRUCTURE')
            print('md = {!r}'.format(md))
            # Use dict.get method in case the metadata dict does not have a 'COMPRESSION' key
            compression = md.get('COMPRESSION', 'RAW')
            print('compression = {!r}'.format(compression))

            print('======== < END IMAGE ID > ===============')