예제 #1
0
def setup_harn(cmdline=False, **kw):
    """
    Ignore:
        kw = {}
        cmdline = False
        harn = setup_harn()
    """
    config = StyleTransferConfig(default=kw)
    config.load(cmdline=cmdline)
    print('config = {}'.format(ub.repr2(config.asdict())))

    nh.configure_hacks(config)

    dataset_info = nh.api.DatasetInfo.coerce(config)

    # input_stats = dataset_info['input_stats']
    model = (TransformerNetwork, {})

    hyper = nh.HyperParams(name=config['name'],
                           workdir=config['workdir'],
                           xpu=nh.XPU.coerce(config['xpu']),
                           datasets=dataset_info['torch_datasets'],
                           loaders=dataset_info['torch_loaders'],
                           model=model,
                           criterion=None,
                           initializer=None,
                           optimizer=nh.Optimizer.coerce(config),
                           dynamics=nh.Dynamics.coerce(config),
                           scheduler=nh.Scheduler.coerce(config),
                           monitor=(nh.Monitor, {
                               'minimize': ['loss'],
                               'patience': config['patience'],
                               'max_epoch': config['max_epoch'],
                               'smoothing': 0.0,
                           }),
                           other={
                               'name': config['name'],
                               'batch_size': config['batch_size'],
                               'balance': config['balance'],
                           },
                           extra={
                               'argv': sys.argv,
                               'config': ub.repr2(config.asdict()),
                           })
    harn = StyleTransferHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 3,
        'keep_freq': 10,
        'tensorboard_groups': ['loss'],
        'eager_dump_tensorboard': True,
    })
    harn.intervals.update({})
    harn.script_config = config
    return harn
예제 #2
0
def setup_harn(cmdline=True, **kw):
    """
    Ignore:
        >>> from object_detection import *  # NOQA
        >>> cmdline = False
        >>> kw = {
        >>>     'train_dataset': '~/data/VOC/voc-trainval.mscoco.json',
        >>>     'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json',
        >>> }
        >>> harn = setup_harn(**kw)
    """
    import ndsampler
    from ndsampler import coerce_data
    # Seed other global rngs just in case something uses them under the hood
    kwarray.seed_global(1129989262, offset=1797315558)

    config = DetectFitConfig(default=kw, cmdline=cmdline)

    nh.configure_hacks(config)  # fix opencv bugs
    ub.ensuredir(config['workdir'])

    # Load ndsampler.CocoDataset objects from info in the config
    subsets = coerce_data.coerce_datasets(config)

    samplers = {}
    for tag, subset in subsets.items():
        print('subset = {!r}'.format(subset))
        sampler = ndsampler.CocoSampler(subset, workdir=config['workdir'])
        samplers[tag] = sampler

    torch_datasets = {
        tag: DetectDataset(
            sampler,
            input_dims=config['input_dims'],
            augment=config['augment'] if (tag == 'train') else False,
        )
        for tag, sampler in samplers.items()
    }

    print('make loaders')
    loaders_ = {
        tag:
        torch.utils.data.DataLoader(dset,
                                    batch_size=config['batch_size'],
                                    num_workers=config['workers'],
                                    shuffle=(tag == 'train'),
                                    collate_fn=nh.data.collate.padded_collate,
                                    pin_memory=True)
        for tag, dset in torch_datasets.items()
    }
    # for x in ub.ProgIter(loaders_['train']):
    #     pass

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2')
        input_stats = cacher.tryload()
        if input_stats is None:
            # Use parallel workers to load data faster
            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=nh.data.collate.padded_collate,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            # Track moving average
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = None
    print('input_stats = {!r}'.format(input_stats))

    initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal')
    print('initializer_ = {!r}'.format(initializer_))

    arch = config['arch']
    if arch == 'yolo2':

        if False:
            dset = samplers['train'].dset
            print('dset = {!r}'.format(dset))
            # anchors = yolo2.find_anchors(dset)

        anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                            (5.05587, 8.09892), (9.47112, 4.84053),
                            (11.2364, 10.0071)])

        classes = samplers['train'].classes
        model_ = (yolo2.Yolo2, {
            'classes': classes,
            'anchors': anchors,
            'conf_thresh': 0.001,
            'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
        })
        model = model_[0](**model_[1])
        model._initkw = model_[1]

        criterion_ = (
            yolo2.YoloLoss,
            {
                'coder': model.coder,
                'seen': 0,
                'coord_scale': 1.0,
                'noobject_scale': 1.0,
                'object_scale': 5.0,
                'class_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                # 'seen_thresh': 12800,
            })
    else:
        raise KeyError(arch)

    scheduler_ = nh.Scheduler.coerce(config)
    print('scheduler_ = {!r}'.format(scheduler_))

    optimizer_ = nh.Optimizer.coerce(config)
    print('optimizer_ = {!r}'.format(optimizer_))

    dynamics_ = nh.Dynamics.coerce(config)
    print('dynamics_ = {!r}'.format(dynamics_))

    xpu = nh.XPU.coerce(config['xpu'])
    print('xpu = {!r}'.format(xpu))

    import sys

    hyper = nh.HyperParams(
        **{
            'nice':
            config['nice'],
            'workdir':
            config['workdir'],
            'datasets':
            torch_datasets,
            'loaders':
            loaders_,
            'xpu':
            xpu,
            'model':
            model,
            'criterion':
            criterion_,
            'initializer':
            initializer_,
            'optimizer':
            optimizer_,
            'dynamics':
            dynamics_,

            # 'optimizer': (torch.optim.SGD, {
            #     'lr': lr_step_points[0],
            #     'momentum': 0.9,
            #     'dampening': 0,
            #     # multiplying by batch size was one of those unpublished details
            #     'weight_decay': decay * simulated_bsize,
            # }),
            'scheduler':
            scheduler_,
            'monitor': (
                nh.Monitor,
                {
                    'minimize': ['loss'],
                    # 'maximize': ['mAP'],
                    'patience': config['patience'],
                    'max_epoch': config['max_epoch'],
                    'smoothing': .6,
                }),
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': config['batch_size'],
                'nice': config['nice'],
                'ovthresh': config['ovthresh'],  # used in mAP computation
            },
            'extra': {
                'config': ub.repr2(config.asdict()),
                'argv': sys.argv,
            }
        })
    print('hyper = {!r}'.format(hyper))
    print('make harn')
    harn = DetectHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 2,
        'keep_freq': 30,
        'export_modules': ['netharn'],  # TODO
        'prog_backend': 'progiter',  # alternative: 'tqdm'
        'keyboard_debug': True,
    })
    harn.intervals.update({
        'log_iter_train': 50,
    })
    harn.fit_config = config
    print('harn = {!r}'.format(harn))
    print('samplers = {!r}'.format(samplers))
    return harn
예제 #3
0
def setup_harn(cmdline=True, **kw):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples'))
        >>> from sseg_camvid import *  # NOQA
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4)
    """
    import sys
    import ndsampler

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    assert config['datasets'] == 'special:camvid'

    coco_datasets = setup_coco_datasets()

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augment=((tag == 'train') and config['augment']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True, pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset, mode=mode)
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    initializer_ = nh.Initializer.coerce(config)

    if config['arch'] == 'unet':
        # Note: UNet can get through 256x256 images at a rate of ~17Hz with
        # batch_size=8. This is pretty slow and can likely be improved by fixing
        # some of the weird padding / mirror stuff I have to do in unet to get
        # output_dims = input_dims.
        from netharn.models.unet import UNet
        model_ = (UNet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'segnet':
        from netharn.models.segnet import Segnet
        model_ = (Segnet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'psp':
        from netharn.models.psp import PSPNet_Resnet50_8s
        model_ = (PSPNet_Resnet50_8s, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'deeplab':
        from netharn.models.deeplab import DeepLab_ASPP
        model_ = (DeepLab_ASPP, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })

    else:
        raise KeyError(config['arch'])

    if config['init'] == 'cls':
        initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': config['focus'],
            'weight': class_weights,
            # 'reduction': 'none',
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
예제 #4
0
def setup_harn(cmdline=True, **kw):
    """
    CommandLine:
        xdoctest -m netharn.examples.segmentation setup_harn

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2)
    """
    import sys
    import ndsampler
    import kwarray
    # kwarray.seed_global(2108744082)

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    coco_datasets = nh.api.Datasets.coerce(config)
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        try:
            sampler.frames.prepare(workers=config['workers'])
        except AttributeError:
            pass

    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True,
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset,
                                                  mode=mode,
                                                  workers=config['workers'])
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    if config['normalize_inputs']:
        stats_dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)),
                                     rng=0)[0:min(1000, len(stats_dset))]
        stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3')
        input_stats = cacher.tryload()
        if input_stats is None:
            loader = torch.utils.data.DataLoader(
                stats_subset,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = {}

    print('input_stats = {!r}'.format(input_stats))

    # TODO: infer numbr of channels
    model_ = (SegmentationModel, {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': torch_datasets['train'].classes.__json__(),
        'in_channels': 3,
    })

    initializer_ = nh.Initializer.coerce(config)
    # if config['init'] == 'cls':
    #     initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),
        datasets=torch_datasets,
        loaders=torch_loaders,
        model=model_,
        initializer=initializer_,
        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),
        criterion=(
            nh.criterions.FocalLoss,
            {
                'focus': config['focus'],
                'weight': class_weights,
                # 'reduction': 'none',
            }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),
        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        })

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 2,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
예제 #5
0
def setup_harn(**kwargs):
    """
    Args:
        nice (str): Custom tag for this run
        workdir (PathLike): path to dump all the intermedate results

        batch_size (int):
            Base batch size. Number of examples in GPU at any time.
        p (int): num individuals per batch
        k (int): num annots-per-individual per batch

        bstep (int): Multiply by batch_size to simulate a larger batches.
        lr (float|str): Base learning rate
        decay (float): Weight decay (L2 regularization)

        workers (int): Number of parallel data loader workers
        xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number
            indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`)
            indicating multiple GPUs

        norm_desc (bool): if True normalizes the descriptors
        pretrained (PathLike): path to a compatible pretrained model

        margin (float): margin for loss criterion
        soft (bool): use soft margin
    """
    import ast

    def trycast(x, type):
        try:
            return type(x)
        except Exception:
            return x

    config = {}
    config['init'] = kwargs.get('init', 'kaiming_normal')
    config['pretrained'] = config.get('pretrained',
                                      ub.argval('--pretrained', default=None))
    config['margin'] = kwargs.get('margin', 3.0)
    config['soft'] = kwargs.get('soft', False)
    config['xpu'] = kwargs.get('xpu', 'argv')
    config['nice'] = kwargs.get('nice', 'untitled')
    config['workdir'] = kwargs.get('workdir', None)
    config['workers'] = int(kwargs.get('workers', 1))
    config['bstep'] = int(kwargs.get('bstep', 1))
    config['optim'] = kwargs.get('optim', 'sgd')
    config['scheduler'] = kwargs.get('scheduler', 'onecycle70')
    config['lr'] = trycast(kwargs.get('lr', 0.0001), float)
    config['decay'] = float(kwargs.get('decay', 1e-5))

    config['max_epoch'] = int(kwargs.get('max_epoch', 100))

    config['num_batches'] = trycast(kwargs.get('num_batches', 1000), int)
    config['batch_size'] = int(kwargs.get('batch_size', 128))
    config['p'] = float(kwargs.get('p', 10))
    config['k'] = float(kwargs.get('k', 25))

    config['arch'] = kwargs.get('arch', 'resnet')
    config['hidden'] = trycast(kwargs.get('hidden', [128]), ast.literal_eval)
    config['desc_size'] = kwargs.get('desc_size', 256)

    config['norm_desc'] = kwargs.get('norm_desc', False)
    config['dim'] = 28

    xpu = nh.XPU.coerce(config['xpu'])
    nh.configure_hacks(config)
    datasets, workdir = setup_datasets()

    loaders = {
        tag: torch.utils.data.DataLoader(
            dset,
            batch_sampler=nh.data.batch_samplers.MatchingSamplerPK(
                dset.pccs,
                shuffle=(tag == 'train'),
                batch_size=config['batch_size'],
                num_batches=config['num_batches'],
                k=config['k'],
                p=config['p'],
            ),
            num_workers=config['workers'],
        )
        for tag, dset in datasets.items()
    }

    if config['arch'] == 'simple':
        model_ = (MNISTEmbeddingNet, {
            'input_shape': (1, 1, config['dim'], config['dim']),
            'desc_size': config['desc_size'],
        })
    elif config['arch'] == 'resnet':
        model_ = (nh.models.DescriptorNetwork, {
            'input_shape': (1, 1, config['dim'], config['dim']),
            'norm_desc': config['norm_desc'],
            'hidden_channels': config['hidden'],
            'desc_size': config['desc_size'],
        })
    else:
        raise KeyError(config['arch'])

    if config['scheduler'] == 'steplr':
        from torch.optim import lr_scheduler
        scheduler_ = (lr_scheduler.StepLR,
                      dict(step_size=8, gamma=0.1, last_epoch=-1))
    else:
        scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70')

    # Here is the FitHarn magic.
    # They nh.HyperParams object keeps track of and helps log all declarative
    # info related to training a model.
    hyper = nh.hyperparams.HyperParams(
        nice=config['nice'],
        xpu=xpu,
        workdir=workdir,
        datasets=datasets,
        loaders=loaders,
        model=model_,
        initializer=nh.Initializer.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        scheduler=scheduler_,
        criterion=(nh.criterions.TripletLoss, {
            'margin': config['margin'],
            'soft': config['soft'],
        }),
        monitor=nh.Monitor.coerce(
            config,
            minimize=['loss', 'pos_dist', 'brier'],
            maximize=['accuracy', 'neg_dist', 'mcc'],
            patience=100,
            max_epoch=config['max_epoch'],
            smoothing=0.4,
        ),
        other={
            'batch_size': config['batch_size'],
            'num_batches': config['num_batches'],
        })

    harn = MNIST_MatchingHarness(hyper=hyper)
    harn.preferences
    return harn
예제 #6
0
def setup_harn(cmdline=True, **kw):
    """
    This creates the "The Classification Harness" (i.e. core ClfHarn object).
    This is where we programmatically connect our program arguments with the
    netharn HyperParameter standards. We are using :module:`scriptconfig` to
    capture these, but you could use click / argparse / etc.

    This function has the responsibility of creating our torch datasets,
    lazy computing input statistics, specifying our model architecture,
    schedule, initialization, optimizer, dynamics, XPU etc. These can usually
    be coerced using netharn API helpers and a "standardized" config dict. See
    the function code for details.

    Args:
        cmdline (bool, default=True):
            if True, behavior will be modified based on ``sys.argv``.
            Note this will activate the scriptconfig ``--help``, ``--dump`` and
            ``--config`` interactions.

    Kwargs:
        **kw: the overrides the default config for :class:`ClfConfig`.
            Note, command line flags have precedence if cmdline=True.

    Returns:
        ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn`
            object.

    Example:
        >>> # xdoctest: +SKIP
        >>> kw = {'datasets': 'special:shapes256'}
        >>> cmdline = False
        >>> harn = setup_harn(cmdline, **kw)
        >>> harn.initialize()
    """
    import ndsampler
    config = ClfConfig(default=kw)
    config.load(cmdline=cmdline)
    print('config = {}'.format(ub.repr2(config.asdict())))

    nh.configure_hacks(config)
    coco_datasets = nh.api.Datasets.coerce(config)

    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['sampler_backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        sampler.frames.prepare(workers=config['workers'])

    torch_datasets = {
        'train':
        ClfDataset(
            samplers['train'],
            input_dims=config['input_dims'],
            augmenter=config['augmenter'],
        ),
        'vali':
        ClfDataset(samplers['vali'],
                   input_dims=config['input_dims'],
                   augmenter=False),
    }

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)

        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
        input_stats = cacher.tryload()

        channels = ChannelSpec.coerce(config['channels'])

        if input_stats is None:
            # Use parallel workers to load data faster
            from netharn.data.data_containers import container_collate
            from functools import partial
            collate_fn = partial(container_collate, num_devices=1)

            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=collate_fn,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])

            # Track moving average of each fused channel stream
            channel_stats = {
                key: nh.util.RunningStats()
                for key in channels.keys()
            }
            assert len(channel_stats) == 1, (
                'only support one fused stream for now')
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                for key, val in batch['inputs'].items():
                    try:
                        for part in val.numpy():
                            channel_stats[key].update(part)
                    except ValueError:  # final batch broadcast error
                        pass

            perchan_input_stats = {}
            for key, running in channel_stats.items():
                running = ub.peek(channel_stats.values())
                perchan_stats = running.simple(axis=(1, 2))
                perchan_input_stats[key] = {
                    'std': perchan_stats['mean'].round(3),
                    'mean': perchan_stats['std'].round(3),
                }

            input_stats = ub.peek(perchan_input_stats.values())
            cacher.save(input_stats)
    else:
        input_stats = {}

    torch_loaders = {
        tag: dset.make_loader(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_workers=config['workers'],
            shuffle=(tag == 'train'),
            balance=(config['balance'] if tag == 'train' else None),
            pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    initializer_ = None
    classes = torch_datasets['train'].classes

    modelkw = {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': classes.__json__(),
        'channels': channels,
    }
    model = ClfModel(**modelkw)
    model._initkw = modelkw

    if initializer_ is None:
        initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(name=config['name'],
                           workdir=config['workdir'],
                           xpu=nh.XPU.coerce(config['xpu']),
                           datasets=torch_datasets,
                           loaders=torch_loaders,
                           model=model,
                           criterion=None,
                           optimizer=nh.Optimizer.coerce(config),
                           dynamics=nh.Dynamics.coerce(config),
                           scheduler=nh.Scheduler.coerce(config),
                           initializer=initializer_,
                           monitor=(nh.Monitor, {
                               'minimize': ['loss'],
                               'patience': config['patience'],
                               'max_epoch': config['max_epoch'],
                               'smoothing': 0.0,
                           }),
                           other={
                               'name': config['name'],
                               'batch_size': config['batch_size'],
                               'balance': config['balance'],
                           },
                           extra={
                               'argv': sys.argv,
                               'config': ub.repr2(config.asdict()),
                           })
    harn = ClfHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 3,
        'keep_freq': 10,
        'tensorboard_groups': ['loss'],
        'eager_dump_tensorboard': True,
    })
    harn.intervals.update({})
    harn.script_config = config
    return harn
예제 #7
0
def setup_harn(cmdline=True, **kwargs):
    """
    cmdline, kwargs = False, {}
    """
    import sys
    import ndsampler

    config = ImageClfConfig(default=kwargs)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3)
    data = cacher.tryload()
    if data is None:
        data = grab_tiny_imagenet_as_coco()
        cacher.save(data)
    coco_datasets = data  # setup_coco_datasets()
    dset = coco_datasets['train']
    print('train dset = {!r}'.format(dset))

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: ImagClfDataset(
            sampler, config['input_dims'],
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    import torchvision
    # TODO: netharn should allow for this
    model_ = torchvision.models.resnet50(pretrained=False)

    # model_ = (, {
    #     'classes': torch_datasets['train'].classes,
    #     'in_channels': 3,
    # })
    initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': 0.0,
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = ImageClfHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
예제 #8
0
파일: yolo_voc.py 프로젝트: Kitware/netharn
def setup_yolo_harness(bsize=16, workers=0):
    """
    CommandLine:
        python -m netharn.examples.yolo_voc setup_yolo_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_yolo_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.coerce('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    nh.configure_hacks(workers=workers)

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        # 'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True,
                              resize_rate=10 * bstep,
                              drop_last=True)
        for key, dset in datasets.items()
    }

    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    if not ub.argflag('--eav'):
        lr_step_points = {
            # 0:   lr * 0.1 / simulated_bsize,  # burnin
            # 4:   lr * 1.0 / simulated_bsize,
            0: lr * 1.0 / simulated_bsize,
            154: lr * 1.0 / simulated_bsize,
            155: lr * 0.1 / simulated_bsize,
            232: lr * 0.1 / simulated_bsize,
            233: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 311
        scheduler_ = (
            nh.schedulers.core.YOLOScheduler,
            {
                'points': lr_step_points,
                # 'interpolate': False,
                'interpolate': True,
                'burn_in': 0.96899225 if ub.argflag('--eav') else
                3.86683584,  # number of epochs to burn_in for. approx 1000 batches?
                'dset_size': len(datasets['train']),  # when drop_last=False
                # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize,  # make a multiple of batch_size because drop_last=True
                'batch_size': batch_size,
            })
        from netharn.models.yolo2 import light_region_loss
        criterion_ = (
            light_region_loss.RegionLoss,
            {
                'num_classes': datasets['train'].num_classes,
                'anchors': anchors,
                'object_scale': 5.0,
                'noobject_scale': 1.0,

                # eav version originally had a random *2 in cls loss,
                # we removed, that but we can replicate it here.
                'class_scale': 1.0 if not ub.argflag('--eav') else 2.0,
                'coord_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                'seen_thresh': 12800,
                # 'small_boxes': not ub.argflag('--eav'),
                'small_boxes': True,
                'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0,
            })
    else:
        lr_step_points = {
            # dividing by batch size was one of those unpublished details
            0: lr * 0.1 / simulated_bsize,
            1: lr * 1.0 / simulated_bsize,
            96: lr * 1.0 / simulated_bsize,
            97: lr * 0.1 / simulated_bsize,
            135: lr * 0.1 / simulated_bsize,
            136: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 176
        scheduler_ = (nh.schedulers.ListedLR, {
            'points': lr_step_points,
            'interpolate': False,
        })
        from netharn.models.yolo2 import region_loss2
        criterion_ = (
            region_loss2.RegionLoss,
            {
                'num_classes': datasets['train'].num_classes,
                'anchors': anchors,
                'reduction': 32,
                'seen': 0,
                'coord_scale': 1.0,
                'noobject_scale': 1.0,
                'object_scale': 5.0,
                'class_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                # 'seen_thresh': 12800,
            })

    weights = ub.argval('--weights', default=None)
    if weights is None or weights == 'imagenet':
        weights = light_yolo.initial_imagenet_weights()
    elif weights == 'lightnet':
        weights = light_yolo.demo_voc_weights()
    else:
        print('weights = {!r}'.format(weights))

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.expandpath('~/work/voc_yolo2'),
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'conf_thresh': 0.001,
                    # 'conf_thresh': 0.1,  # make training a bit faster
                    'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
                }),
            'criterion':
            criterion_,
            'initializer': (nh.initializers.Pretrained, {
                'fpath': weights,
            }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr_step_points[0],
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),
            'scheduler':
            scheduler_,
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': max_epoch,
                'max_epoch': max_epoch,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = YoloHarn(hyper=hyper)
    harn.preferences['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    harn.preferences[
        'large_loss'] = 1000  # tell netharn when to check for divergence
    return harn
예제 #9
0
def setup_harn(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harn

    Args:
        dbname (str): Name of IBEIS database to use
        nice (str): Custom tag for this run
        workdir (PathLike): path to dump all the intermedate results
        dim (int): Width and height of the network input
        batch_size (int): Base batch size. Number of examples in GPU at any time.
        bstep (int): Multiply by batch_size to simulate a larger batches.
        lr (float): Base learning rate
        decay (float): Weight decay (L2 regularization)
        workers (int): Number of parallel data loader workers
        xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number
            indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`)
            indicating multiple GPUs
        triple (bool): if True uses triplet loss, otherwise contrastive loss
        norm_desc (bool): if True normalizes the descriptors
        pretrained (PathLike): path to a compatible pretrained model
        margin (float): margin for loss criterion
        soft (bool): use soft margin

    Example:
        >>> harn = setup_harn(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    config = parse_config(**kwargs)

    nh.configure_hacks(config)
    datasets, workdir = setup_datasets(config)

    loaders = {
        tag: dset.make_loader(
            shuffle=(tag == 'train'),
            batch_size=config['batch_size'],
            num_batches=(config['num_batches']
                         if tag == 'train' else config['num_batches'] // 10),
            k=config['k'],
            p=config['p'],
            num_workers=config['workers'],
        )
        for tag, dset in datasets.items()
    }

    if config['scheduler'] == 'steplr':
        from torch.optim import lr_scheduler
        scheduler_ = (lr_scheduler.StepLR,
                      dict(step_size=8, gamma=0.1, last_epoch=-1))
    else:
        scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70')

    hyper = nh.HyperParams(
        **{
            'nice':
            config['nice'],
            'workdir':
            config['workdir'],
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            nh.XPU.coerce(config['xpu']),
            'model': (
                nh.models.DescriptorNetwork,
                {
                    'input_shape': (1, 3, config['dim'], config['dim']),
                    'norm_desc': config['norm_desc'],
                    # 'hidden_channels': [512, 256]
                    'hidden_channels': [256],
                    'desc_size': 128,
                }),
            'initializer':
            nh.Initializer.coerce(config),
            'optimizer':
            nh.Optimizer.coerce(config),
            'scheduler':
            scheduler_,
            'criterion': (nh.criterions.TripletLoss, {
                'margin': config['margin'],
                'soft': config['soft'],
            }),
            'monitor':
            nh.Monitor.coerce(
                config,
                minimize=['loss', 'pos_dist', 'brier'],
                maximize=['accuracy', 'neg_dist', 'mcc'],
                patience=100,
                max_epoch=100,
            ),
            'dynamics':
            nh.Dynamics.coerce(config),
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.preferences['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn