def setup_harn(cmdline=False, **kw): """ Ignore: kw = {} cmdline = False harn = setup_harn() """ config = StyleTransferConfig(default=kw) config.load(cmdline=cmdline) print('config = {}'.format(ub.repr2(config.asdict()))) nh.configure_hacks(config) dataset_info = nh.api.DatasetInfo.coerce(config) # input_stats = dataset_info['input_stats'] model = (TransformerNetwork, {}) hyper = nh.HyperParams(name=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=dataset_info['torch_datasets'], loaders=dataset_info['torch_loaders'], model=model, criterion=None, initializer=None, optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), scheduler=nh.Scheduler.coerce(config), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': 0.0, }), other={ 'name': config['name'], 'batch_size': config['batch_size'], 'balance': config['balance'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) harn = StyleTransferHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 3, 'keep_freq': 10, 'tensorboard_groups': ['loss'], 'eager_dump_tensorboard': True, }) harn.intervals.update({}) harn.script_config = config return harn
def setup_harn(cmdline=True, **kw): """ Ignore: >>> from object_detection import * # NOQA >>> cmdline = False >>> kw = { >>> 'train_dataset': '~/data/VOC/voc-trainval.mscoco.json', >>> 'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json', >>> } >>> harn = setup_harn(**kw) """ import ndsampler from ndsampler import coerce_data # Seed other global rngs just in case something uses them under the hood kwarray.seed_global(1129989262, offset=1797315558) config = DetectFitConfig(default=kw, cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs ub.ensuredir(config['workdir']) # Load ndsampler.CocoDataset objects from info in the config subsets = coerce_data.coerce_datasets(config) samplers = {} for tag, subset in subsets.items(): print('subset = {!r}'.format(subset)) sampler = ndsampler.CocoSampler(subset, workdir=config['workdir']) samplers[tag] = sampler torch_datasets = { tag: DetectDataset( sampler, input_dims=config['input_dims'], augment=config['augment'] if (tag == 'train') else False, ) for tag, sampler in samplers.items() } print('make loaders') loaders_ = { tag: torch.utils.data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), collate_fn=nh.data.collate.padded_collate, pin_memory=True) for tag, dset in torch_datasets.items() } # for x in ub.ProgIter(loaders_['train']): # pass if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2') input_stats = cacher.tryload() if input_stats is None: # Use parallel workers to load data faster loader = torch.utils.data.DataLoader( stats_subset, collate_fn=nh.data.collate.padded_collate, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = None print('input_stats = {!r}'.format(input_stats)) initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal') print('initializer_ = {!r}'.format(initializer_)) arch = config['arch'] if arch == 'yolo2': if False: dset = samplers['train'].dset print('dset = {!r}'.format(dset)) # anchors = yolo2.find_anchors(dset) anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) classes = samplers['train'].classes model_ = (yolo2.Yolo2, { 'classes': classes, 'anchors': anchors, 'conf_thresh': 0.001, 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }) model = model_[0](**model_[1]) model._initkw = model_[1] criterion_ = ( yolo2.YoloLoss, { 'coder': model.coder, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }) else: raise KeyError(arch) scheduler_ = nh.Scheduler.coerce(config) print('scheduler_ = {!r}'.format(scheduler_)) optimizer_ = nh.Optimizer.coerce(config) print('optimizer_ = {!r}'.format(optimizer_)) dynamics_ = nh.Dynamics.coerce(config) print('dynamics_ = {!r}'.format(dynamics_)) xpu = nh.XPU.coerce(config['xpu']) print('xpu = {!r}'.format(xpu)) import sys hyper = nh.HyperParams( **{ 'nice': config['nice'], 'workdir': config['workdir'], 'datasets': torch_datasets, 'loaders': loaders_, 'xpu': xpu, 'model': model, 'criterion': criterion_, 'initializer': initializer_, 'optimizer': optimizer_, 'dynamics': dynamics_, # 'optimizer': (torch.optim.SGD, { # 'lr': lr_step_points[0], # 'momentum': 0.9, # 'dampening': 0, # # multiplying by batch size was one of those unpublished details # 'weight_decay': decay * simulated_bsize, # }), 'scheduler': scheduler_, 'monitor': ( nh.Monitor, { 'minimize': ['loss'], # 'maximize': ['mAP'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': config['batch_size'], 'nice': config['nice'], 'ovthresh': config['ovthresh'], # used in mAP computation }, 'extra': { 'config': ub.repr2(config.asdict()), 'argv': sys.argv, } }) print('hyper = {!r}'.format(hyper)) print('make harn') harn = DetectHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 2, 'keep_freq': 30, 'export_modules': ['netharn'], # TODO 'prog_backend': 'progiter', # alternative: 'tqdm' 'keyboard_debug': True, }) harn.intervals.update({ 'log_iter_train': 50, }) harn.fit_config = config print('harn = {!r}'.format(harn)) print('samplers = {!r}'.format(samplers)) return harn
def setup_harn(cmdline=True, **kw): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples')) >>> from sseg_camvid import * # NOQA >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4) """ import sys import ndsampler config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs assert config['datasets'] == 'special:camvid' coco_datasets = setup_coco_datasets() workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augment=((tag == 'train') and config['augment']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None initializer_ = nh.Initializer.coerce(config) if config['arch'] == 'unet': # Note: UNet can get through 256x256 images at a rate of ~17Hz with # batch_size=8. This is pretty slow and can likely be improved by fixing # some of the weird padding / mirror stuff I have to do in unet to get # output_dims = input_dims. from netharn.models.unet import UNet model_ = (UNet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'segnet': from netharn.models.segnet import Segnet model_ = (Segnet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'psp': from netharn.models.psp import PSPNet_Resnet50_8s model_ = (PSPNet_Resnet50_8s, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'deeplab': from netharn.models.deeplab import DeepLab_ASPP model_ = (DeepLab_ASPP, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) else: raise KeyError(config['arch']) if config['init'] == 'cls': initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_harn(cmdline=True, **kw): """ CommandLine: xdoctest -m netharn.examples.segmentation setup_harn Example: >>> # xdoctest: +REQUIRES(--slow) >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2) """ import sys import ndsampler import kwarray # kwarray.seed_global(2108744082) config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): try: sampler.frames.prepare(workers=config['workers']) except AttributeError: pass torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode, workers=config['workers']) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None if config['normalize_inputs']: stats_dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)), rng=0)[0:min(1000, len(stats_dset))] stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3') input_stats = cacher.tryload() if input_stats is None: loader = torch.utils.data.DataLoader( stats_subset, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = {} print('input_stats = {!r}'.format(input_stats)) # TODO: infer numbr of channels model_ = (SegmentationModel, { 'arch': config['arch'], 'input_stats': input_stats, 'classes': torch_datasets['train'].classes.__json__(), 'in_channels': 3, }) initializer_ = nh.Initializer.coerce(config) # if config['init'] == 'cls': # initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=( nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 2, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_harn(**kwargs): """ Args: nice (str): Custom tag for this run workdir (PathLike): path to dump all the intermedate results batch_size (int): Base batch size. Number of examples in GPU at any time. p (int): num individuals per batch k (int): num annots-per-individual per batch bstep (int): Multiply by batch_size to simulate a larger batches. lr (float|str): Base learning rate decay (float): Weight decay (L2 regularization) workers (int): Number of parallel data loader workers xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`) indicating multiple GPUs norm_desc (bool): if True normalizes the descriptors pretrained (PathLike): path to a compatible pretrained model margin (float): margin for loss criterion soft (bool): use soft margin """ import ast def trycast(x, type): try: return type(x) except Exception: return x config = {} config['init'] = kwargs.get('init', 'kaiming_normal') config['pretrained'] = config.get('pretrained', ub.argval('--pretrained', default=None)) config['margin'] = kwargs.get('margin', 3.0) config['soft'] = kwargs.get('soft', False) config['xpu'] = kwargs.get('xpu', 'argv') config['nice'] = kwargs.get('nice', 'untitled') config['workdir'] = kwargs.get('workdir', None) config['workers'] = int(kwargs.get('workers', 1)) config['bstep'] = int(kwargs.get('bstep', 1)) config['optim'] = kwargs.get('optim', 'sgd') config['scheduler'] = kwargs.get('scheduler', 'onecycle70') config['lr'] = trycast(kwargs.get('lr', 0.0001), float) config['decay'] = float(kwargs.get('decay', 1e-5)) config['max_epoch'] = int(kwargs.get('max_epoch', 100)) config['num_batches'] = trycast(kwargs.get('num_batches', 1000), int) config['batch_size'] = int(kwargs.get('batch_size', 128)) config['p'] = float(kwargs.get('p', 10)) config['k'] = float(kwargs.get('k', 25)) config['arch'] = kwargs.get('arch', 'resnet') config['hidden'] = trycast(kwargs.get('hidden', [128]), ast.literal_eval) config['desc_size'] = kwargs.get('desc_size', 256) config['norm_desc'] = kwargs.get('norm_desc', False) config['dim'] = 28 xpu = nh.XPU.coerce(config['xpu']) nh.configure_hacks(config) datasets, workdir = setup_datasets() loaders = { tag: torch.utils.data.DataLoader( dset, batch_sampler=nh.data.batch_samplers.MatchingSamplerPK( dset.pccs, shuffle=(tag == 'train'), batch_size=config['batch_size'], num_batches=config['num_batches'], k=config['k'], p=config['p'], ), num_workers=config['workers'], ) for tag, dset in datasets.items() } if config['arch'] == 'simple': model_ = (MNISTEmbeddingNet, { 'input_shape': (1, 1, config['dim'], config['dim']), 'desc_size': config['desc_size'], }) elif config['arch'] == 'resnet': model_ = (nh.models.DescriptorNetwork, { 'input_shape': (1, 1, config['dim'], config['dim']), 'norm_desc': config['norm_desc'], 'hidden_channels': config['hidden'], 'desc_size': config['desc_size'], }) else: raise KeyError(config['arch']) if config['scheduler'] == 'steplr': from torch.optim import lr_scheduler scheduler_ = (lr_scheduler.StepLR, dict(step_size=8, gamma=0.1, last_epoch=-1)) else: scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70') # Here is the FitHarn magic. # They nh.HyperParams object keeps track of and helps log all declarative # info related to training a model. hyper = nh.hyperparams.HyperParams( nice=config['nice'], xpu=xpu, workdir=workdir, datasets=datasets, loaders=loaders, model=model_, initializer=nh.Initializer.coerce(config), optimizer=nh.Optimizer.coerce(config), scheduler=scheduler_, criterion=(nh.criterions.TripletLoss, { 'margin': config['margin'], 'soft': config['soft'], }), monitor=nh.Monitor.coerce( config, minimize=['loss', 'pos_dist', 'brier'], maximize=['accuracy', 'neg_dist', 'mcc'], patience=100, max_epoch=config['max_epoch'], smoothing=0.4, ), other={ 'batch_size': config['batch_size'], 'num_batches': config['num_batches'], }) harn = MNIST_MatchingHarness(hyper=hyper) harn.preferences return harn
def setup_harn(cmdline=True, **kw): """ This creates the "The Classification Harness" (i.e. core ClfHarn object). This is where we programmatically connect our program arguments with the netharn HyperParameter standards. We are using :module:`scriptconfig` to capture these, but you could use click / argparse / etc. This function has the responsibility of creating our torch datasets, lazy computing input statistics, specifying our model architecture, schedule, initialization, optimizer, dynamics, XPU etc. These can usually be coerced using netharn API helpers and a "standardized" config dict. See the function code for details. Args: cmdline (bool, default=True): if True, behavior will be modified based on ``sys.argv``. Note this will activate the scriptconfig ``--help``, ``--dump`` and ``--config`` interactions. Kwargs: **kw: the overrides the default config for :class:`ClfConfig`. Note, command line flags have precedence if cmdline=True. Returns: ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn` object. Example: >>> # xdoctest: +SKIP >>> kw = {'datasets': 'special:shapes256'} >>> cmdline = False >>> harn = setup_harn(cmdline, **kw) >>> harn.initialize() """ import ndsampler config = ClfConfig(default=kw) config.load(cmdline=cmdline) print('config = {}'.format(ub.repr2(config.asdict()))) nh.configure_hacks(config) coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): sampler.frames.prepare(workers=config['workers']) torch_datasets = { 'train': ClfDataset( samplers['train'], input_dims=config['input_dims'], augmenter=config['augmenter'], ), 'vali': ClfDataset(samplers['vali'], input_dims=config['input_dims'], augmenter=False), } if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3') input_stats = cacher.tryload() channels = ChannelSpec.coerce(config['channels']) if input_stats is None: # Use parallel workers to load data faster from netharn.data.data_containers import container_collate from functools import partial collate_fn = partial(container_collate, num_devices=1) loader = torch.utils.data.DataLoader( stats_subset, collate_fn=collate_fn, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average of each fused channel stream channel_stats = { key: nh.util.RunningStats() for key in channels.keys() } assert len(channel_stats) == 1, ( 'only support one fused stream for now') for batch in ub.ProgIter(loader, desc='estimate mean/std'): for key, val in batch['inputs'].items(): try: for part in val.numpy(): channel_stats[key].update(part) except ValueError: # final batch broadcast error pass perchan_input_stats = {} for key, running in channel_stats.items(): running = ub.peek(channel_stats.values()) perchan_stats = running.simple(axis=(1, 2)) perchan_input_stats[key] = { 'std': perchan_stats['mean'].round(3), 'mean': perchan_stats['std'].round(3), } input_stats = ub.peek(perchan_input_stats.values()) cacher.save(input_stats) else: input_stats = {} torch_loaders = { tag: dset.make_loader( batch_size=config['batch_size'], num_batches=config['num_batches'], num_workers=config['workers'], shuffle=(tag == 'train'), balance=(config['balance'] if tag == 'train' else None), pin_memory=True) for tag, dset in torch_datasets.items() } initializer_ = None classes = torch_datasets['train'].classes modelkw = { 'arch': config['arch'], 'input_stats': input_stats, 'classes': classes.__json__(), 'channels': channels, } model = ClfModel(**modelkw) model._initkw = modelkw if initializer_ is None: initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams(name=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model, criterion=None, optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), scheduler=nh.Scheduler.coerce(config), initializer=initializer_, monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': 0.0, }), other={ 'name': config['name'], 'batch_size': config['batch_size'], 'balance': config['balance'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) harn = ClfHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 3, 'keep_freq': 10, 'tensorboard_groups': ['loss'], 'eager_dump_tensorboard': True, }) harn.intervals.update({}) harn.script_config = config return harn
def setup_harn(cmdline=True, **kwargs): """ cmdline, kwargs = False, {} """ import sys import ndsampler config = ImageClfConfig(default=kwargs) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3) data = cacher.tryload() if data is None: data = grab_tiny_imagenet_as_coco() cacher.save(data) coco_datasets = data # setup_coco_datasets() dset = coco_datasets['train'] print('train dset = {!r}'.format(dset)) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: ImagClfDataset( sampler, config['input_dims'], augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), pin_memory=True) for tag, dset in torch_datasets.items() } import torchvision # TODO: netharn should allow for this model_ = torchvision.models.resnet50(pretrained=False) # model_ = (, { # 'classes': torch_datasets['train'].classes, # 'in_channels': 3, # }) initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': 0.0, }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = ImageClfHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_yolo_harness(bsize=16, workers=0): """ CommandLine: python -m netharn.examples.yolo_voc setup_yolo_harness Example: >>> # DISABLE_DOCTSET >>> harn = setup_yolo_harness() >>> harn.initialize() """ xpu = nh.XPU.coerce('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 4)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 simulated_bsize = bstep * batch_size nh.configure_hacks(workers=workers) # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(years=[2007, 2012], split='trainval'), # 'test': YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True, resize_rate=10 * bstep, drop_last=True) for key, dset in datasets.items() } anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) if not ub.argflag('--eav'): lr_step_points = { # 0: lr * 0.1 / simulated_bsize, # burnin # 4: lr * 1.0 / simulated_bsize, 0: lr * 1.0 / simulated_bsize, 154: lr * 1.0 / simulated_bsize, 155: lr * 0.1 / simulated_bsize, 232: lr * 0.1 / simulated_bsize, 233: lr * 0.01 / simulated_bsize, } max_epoch = 311 scheduler_ = ( nh.schedulers.core.YOLOScheduler, { 'points': lr_step_points, # 'interpolate': False, 'interpolate': True, 'burn_in': 0.96899225 if ub.argflag('--eav') else 3.86683584, # number of epochs to burn_in for. approx 1000 batches? 'dset_size': len(datasets['train']), # when drop_last=False # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize, # make a multiple of batch_size because drop_last=True 'batch_size': batch_size, }) from netharn.models.yolo2 import light_region_loss criterion_ = ( light_region_loss.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, # eav version originally had a random *2 in cls loss, # we removed, that but we can replicate it here. 'class_scale': 1.0 if not ub.argflag('--eav') else 2.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh 'seen_thresh': 12800, # 'small_boxes': not ub.argflag('--eav'), 'small_boxes': True, 'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0, }) else: lr_step_points = { # dividing by batch size was one of those unpublished details 0: lr * 0.1 / simulated_bsize, 1: lr * 1.0 / simulated_bsize, 96: lr * 1.0 / simulated_bsize, 97: lr * 0.1 / simulated_bsize, 135: lr * 0.1 / simulated_bsize, 136: lr * 0.01 / simulated_bsize, } max_epoch = 176 scheduler_ = (nh.schedulers.ListedLR, { 'points': lr_step_points, 'interpolate': False, }) from netharn.models.yolo2 import region_loss2 criterion_ = ( region_loss2.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'reduction': 32, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }) weights = ub.argval('--weights', default=None) if weights is None or weights == 'imagenet': weights = light_yolo.initial_imagenet_weights() elif weights == 'lightnet': weights = light_yolo.demo_voc_weights() else: print('weights = {!r}'.format(weights)) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.expandpath('~/work/voc_yolo2'), 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': ( light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': 0.001, # 'conf_thresh': 0.1, # make training a bit faster 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }), 'criterion': criterion_, 'initializer': (nh.initializers.Pretrained, { 'fpath': weights, }), 'optimizer': ( torch.optim.SGD, { 'lr': lr_step_points[0], 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), 'scheduler': scheduler_, 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': max_epoch, 'max_epoch': max_epoch, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) print('max_epoch = {!r}'.format(max_epoch)) harn = YoloHarn(hyper=hyper) harn.preferences['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None harn.preferences[ 'large_loss'] = 1000 # tell netharn when to check for divergence return harn
def setup_harn(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harn Args: dbname (str): Name of IBEIS database to use nice (str): Custom tag for this run workdir (PathLike): path to dump all the intermedate results dim (int): Width and height of the network input batch_size (int): Base batch size. Number of examples in GPU at any time. bstep (int): Multiply by batch_size to simulate a larger batches. lr (float): Base learning rate decay (float): Weight decay (L2 regularization) workers (int): Number of parallel data loader workers xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`) indicating multiple GPUs triple (bool): if True uses triplet loss, otherwise contrastive loss norm_desc (bool): if True normalizes the descriptors pretrained (PathLike): path to a compatible pretrained model margin (float): margin for loss criterion soft (bool): use soft margin Example: >>> harn = setup_harn(dbname='PZ_MTEST') >>> harn.initialize() """ config = parse_config(**kwargs) nh.configure_hacks(config) datasets, workdir = setup_datasets(config) loaders = { tag: dset.make_loader( shuffle=(tag == 'train'), batch_size=config['batch_size'], num_batches=(config['num_batches'] if tag == 'train' else config['num_batches'] // 10), k=config['k'], p=config['p'], num_workers=config['workers'], ) for tag, dset in datasets.items() } if config['scheduler'] == 'steplr': from torch.optim import lr_scheduler scheduler_ = (lr_scheduler.StepLR, dict(step_size=8, gamma=0.1, last_epoch=-1)) else: scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70') hyper = nh.HyperParams( **{ 'nice': config['nice'], 'workdir': config['workdir'], 'datasets': datasets, 'loaders': loaders, 'xpu': nh.XPU.coerce(config['xpu']), 'model': ( nh.models.DescriptorNetwork, { 'input_shape': (1, 3, config['dim'], config['dim']), 'norm_desc': config['norm_desc'], # 'hidden_channels': [512, 256] 'hidden_channels': [256], 'desc_size': 128, }), 'initializer': nh.Initializer.coerce(config), 'optimizer': nh.Optimizer.coerce(config), 'scheduler': scheduler_, 'criterion': (nh.criterions.TripletLoss, { 'margin': config['margin'], 'soft': config['soft'], }), 'monitor': nh.Monitor.coerce( config, minimize=['loss', 'pos_dist', 'brier'], maximize=['accuracy', 'neg_dist', 'mcc'], patience=100, max_epoch=100, ), 'dynamics': nh.Dynamics.coerce(config), 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.preferences['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn