def _demodata_toy_sesssion(workdir, name='demo_session', lr=1e-4): """ workdir = ub.ensure_app_cache_dir('netharn/tests/sessions') workdir """ # This will train a toy model with toy data using netharn import netharn as nh hyper = nh.HyperParams( **{ 'workdir': ub.ensure_app_cache_dir('netharn/tests/sessions'), 'name': name, 'xpu': nh.XPU.coerce('cpu'), 'datasets': { 'train': nh.data.ToyData2d(size=3, rng=0), 'vali': nh.data.ToyData2d(size=3, rng=0) }, 'loaders': { 'batch_size': 64 }, 'model': (nh.models.ToyNet2d, {}), 'optimizer': (nh.optimizers.SGD, { 'lr': lr }), 'criterion': (nh.criterions.FocalLoss, {}), 'initializer': (nh.initializers.KaimingNormal, {}), 'monitor': (nh.Monitor, { 'max_epoch': 1 }), }) harn = nh.FitHarn(hyper) harn.preferences['use_tensorboard'] = False harn.preferences['timeout'] = 1 harn.run() # TODO: make this run faster if we don't need to rerun
def _demodata_trained_dpath(): # This will train a toy model with toy data using netharn import netharn as nh hyper = nh.HyperParams( **{ 'workdir': ub.ensure_app_cache_dir('netharn/tests/deploy'), 'nice': 'deploy_demo_static', 'xpu': nh.XPU.cast('cpu'), 'datasets': { 'train': nh.data.ToyData2d(size=3, rng=0) }, 'loaders': { 'batch_size': 64 }, 'model': (nh.models.ToyNet2d, {}), 'optimizer': (nh.optimizers.SGD, { 'lr': 0.0001 }), 'criterion': (nh.criterions.FocalLoss, {}), 'initializer': (nh.initializers.KaimingNormal, {}), 'monitor': (nh.Monitor, { 'max_epoch': 1 }), }) harn = nh.FitHarn(hyper) harn.run() # TODO: make this run faster if we don't need to rerun if len(list(glob.glob(join(harn.train_dpath, '*.py')))) > 1: # If multiple models are deployed some hash changed. Need to reset harn.initialize(reset='delete') harn.run() # don't relearn if we already finished this one return harn.train_dpath
def _demodata_toy_harn(): # This will train a toy model with toy data using netharn import netharn as nh hyper = nh.HyperParams( **{ 'workdir': ub.ensure_app_cache_dir('torch_liberator/tests/deploy'), 'name': 'demo_liberator_static', 'xpu': nh.XPU.coerce('cpu'), 'datasets': { 'train': nh.data.ToyData2d(size=3, rng=0) }, 'loaders': { 'batch_size': 64 }, 'model': (nh.models.ToyNet2d, {}), 'optimizer': (nh.optimizers.SGD, { 'lr': 0.0001 }), 'criterion': (nh.criterions.FocalLoss, {}), 'initializer': (nh.initializers.KaimingNormal, {}), 'monitor': (nh.Monitor, { 'max_epoch': 1 }), }) harn = nh.FitHarn(hyper) harn.preferences['use_tensorboard'] = False harn.preferences['log_gradients'] = False harn.preferences['timeout'] = 1 return harn
def setup_harn(cmdline=False, **kw): """ Ignore: kw = {} cmdline = False harn = setup_harn() """ config = StyleTransferConfig(default=kw) config.load(cmdline=cmdline) print('config = {}'.format(ub.repr2(config.asdict()))) nh.configure_hacks(config) dataset_info = nh.api.DatasetInfo.coerce(config) # input_stats = dataset_info['input_stats'] model = (TransformerNetwork, {}) hyper = nh.HyperParams(name=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=dataset_info['torch_datasets'], loaders=dataset_info['torch_loaders'], model=model, criterion=None, initializer=None, optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), scheduler=nh.Scheduler.coerce(config), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': 0.0, }), other={ 'name': config['name'], 'batch_size': config['batch_size'], 'balance': config['balance'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) harn = StyleTransferHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 3, 'keep_freq': 10, 'tensorboard_groups': ['loss'], 'eager_dump_tensorboard': True, }) harn.intervals.update({}) harn.script_config = config return harn
def setup_harn(cmdline=True, **kw): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples')) >>> from sseg_camvid import * # NOQA >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4) """ import sys import ndsampler config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs assert config['datasets'] == 'special:camvid' coco_datasets = setup_coco_datasets() workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augment=((tag == 'train') and config['augment']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None initializer_ = nh.Initializer.coerce(config) if config['arch'] == 'unet': # Note: UNet can get through 256x256 images at a rate of ~17Hz with # batch_size=8. This is pretty slow and can likely be improved by fixing # some of the weird padding / mirror stuff I have to do in unet to get # output_dims = input_dims. from netharn.models.unet import UNet model_ = (UNet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'segnet': from netharn.models.segnet import Segnet model_ = (Segnet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'psp': from netharn.models.psp import PSPNet_Resnet50_8s model_ = (PSPNet_Resnet50_8s, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'deeplab': from netharn.models.deeplab import DeepLab_ASPP model_ = (DeepLab_ASPP, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) else: raise KeyError(config['arch']) if config['init'] == 'cls': initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def check_inconsistency(): import netharn as nh import numpy as np import torch import ubelt as ub from netharn.models.yolo2 import light_yolo from netharn.models.yolo2 import light_region_loss yolo_voc = ub.import_module_from_path(ub.truepath('~/code/netharn/examples/yolo_voc.py')) xpu = nh.XPU.cast('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = 8 bstep = 8 workers = 0 decay = 0.0005 lr = 0.001 ovthresh = 0.5 simulated_bsize = bstep * batch_size # We will divide the learning rate by the simulated batch size datasets = { # 'train': yolo_voc.YoloVOCDataset(years=[2007, 2012], split='trainval'), 'test': yolo_voc.YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True, resize_rate=10 * bstep, drop_last=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) assert simulated_bsize == 64, 'must be 64' lr_step_points = { 0: 0, # Hack to see performance before any learning 1: 0, 2: lr * 1.0 / simulated_bsize, 3: lr * 1.0 / simulated_bsize, } max_epoch = 3 # Anchors anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) hyper = nh.HyperParams(**{ 'nice': nice, 'workdir': ub.truepath('~/work/devcheck_yolo'), 'datasets': datasets, 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': (light_yolo.Yolo, { # 'num_classes': datasets['train'].num_classes, 'num_classes': 20, 'anchors': anchors, # 'conf_thresh': 0.001, 'conf_thresh': 0.1, # make training a bit faster # nms_thresh=0.5 to reproduce original yolo # nms_thresh=0.4 to reproduce lightnet 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }), 'criterion': (light_region_loss.RegionLoss, { # 'num_classes': datasets['train'].num_classes, 'num_classes': 20, 'anchors': anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, 'class_scale': 1.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh }), 'initializer': (nh.initializers.Pretrained, { # 'fpath': light_yolo.initial_imagenet_weights(), 'fpath': light_yolo.demo_voc_weights(), }), 'optimizer': (torch.optim.SGD, { 'lr': lr_step_points[0], 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), 'scheduler': (nh.schedulers.core.YOLOScheduler, { 'points': lr_step_points, 'interpolate': True, 'burn_in': 1, # 'dset_size': len(datasets['train']), # when drop_last=False 'dset_size': len(datasets['test']), # when drop_last=False 'batch_size': batch_size, }), 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': max_epoch, 'max_epoch': max_epoch, }), # 'augment': datasets['train'].augmenter, 'dynamics': {'batch_step': bstep}, 'other': { 'nice': nice, 'ovthresh': ovthresh, }, }) print('max_epoch = {!r}'.format(max_epoch)) harn = yolo_voc.YoloHarn(hyper=hyper) harn.config['use_tqdm'] = False harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None harn.initialize() harn.run()
def setup_harness(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/siam_ibeis.py setup_harness Example: >>> harn = setup_harness(dbname='PZ_MTEST') >>> harn.initialize() """ nice = kwargs.get('nice', 'untitled') bsize = int(kwargs.get('bsize', 6)) bstep = int(kwargs.get('bstep', 4)) workers = int(kwargs.get('workers', 0)) decay = float(kwargs.get('decay', 0.0005)) lr = float(kwargs.get('lr', 0.001)) dim = int(kwargs.get('dim', 416)) xpu = kwargs.get('xpu', 'cpu') workdir = kwargs.get('workdir', None) dbname = kwargs.get('dbname', 'PZ_MTEST') datasets = randomized_ibeis_dset(dbname, dim=dim) if workdir is None: workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname)) ub.ensuredir(workdir) for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) loaders = { key: torch.utils.data.DataLoader( dset, batch_size=bsize, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } xpu = nh.XPU.cast(xpu) hyper = nh.HyperParams(**{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': (SiameseLP, { 'p': 2, 'input_shape': (1, 3, dim, dim), }), 'criterion': (nh.criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), 'optimizer': (torch.optim.SGD, { 'lr': lr / 10, 'weight_decay': decay, 'momentum': 0.9, 'nesterov': True, }), 'initializer': (nh.initializers.NoOp, {}), 'scheduler': (nh.schedulers.ListedLR, { 'points': { 0: lr / 10, 1: lr, 59: lr * 1.1, 60: lr / 10, 90: lr / 100, }, 'interpolate': True }), 'monitor': (nh.Monitor, { 'minimize': ['loss', 'pos_dist'], 'maximize': ['accuracy', 'neg_dist'], 'patience': 160, 'max_epoch': 160, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { 'n_classes': 2, }, }) harn = SiamHarness(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def train(): np.random.seed(1031726816 % 4294967295) torch.manual_seed(137852547 % 4294967295) random.seed(2497950049 % 4294967295) # batch_size = int(ub.argval('--batch_size', default=128)) batch_size = int(ub.argval('--batch_size', default=64)) workers = int(ub.argval('--workers', default=6)) model_key = ub.argval('--model', default="CropNetFCAE") xpu = nh.XPU.cast("gpu") lr = 0.001 transform_train = transforms.Compose([ transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) workdir = ub.ensure_app_cache_dir('netharn') datasets = { 'train': torchvision.datasets.CIFAR10(root=workdir, train=True, download=True, transform=transform_train), 'test': torchvision.datasets.CIFAR10(root=workdir, train=False, download=True, transform=transform_test), } # For some reason the torchvision objects dont have the label names CIFAR10_CLASSNAMES = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck', ] datasets['train'].class_names = CIFAR10_CLASSNAMES datasets['test'].class_names = CIFAR10_CLASSNAMES n_classes = 10 # hacked in loaders = { key: torch.utils.data.DataLoader(dset, shuffle=key == 'train', num_workers=workers, batch_size=batch_size, pin_memory=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) initializer_ = (nh.initializers.KaimingNormal, { 'param': 0, 'mode': 'fan_in' }) # initializer_ = (initializers.LSUV, {}) available_models = { "CropNetFCAE": (CropNetFCAE, { "chip_size": 19, "bneck_size": 3, }), } model_ = available_models[model_key] hyper = nh.HyperParams( datasets=datasets, nice='cifar10_' + model_key, loaders=loaders, workdir=workdir, xpu=xpu, model=model_, optimizer=(torch.optim.SGD, { 'lr': lr, 'weight_decay': 5e-4, 'momentum': 0.9, 'nesterov': True, }), scheduler=(nh.schedulers.ListedLR, { 'points': { 0: lr, 150: lr * 0.1, 250: lr * 0.01, }, 'interpolate': False }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': 350, 'max_epoch': 350, }), initializer=initializer_, criterion=(torch.nn.CrossEntropyLoss, {}), # Specify anything else that is special about your hyperparams here # Especially if you make a custom_batch_runner # TODO: type of augmentation as a parameter dependency # augment=str(datasets['train'].augmenter), # other=ub.dict_union({ # # 'colorspace': datasets['train'].output_colorspace, # }, datasets['train'].center_inputs.__dict__), ) harn = CIFAR_FitHarn(hyper=hyper) harn.initialize() harn.run()
def setup_harness(bsize=16, workers=0, **kw): """ CommandLine: python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness Example: >>> harn = setup_harness() >>> harn.initialize() """ xpu = nh.XPU.cast('argv') def _argval(arg, default): return ub.argval(arg, kw.get(arg.lstrip('-'), default)) nice = _argval('--nice', default='Yolo2Baseline') batch_size = int(_argval('--batch_size', default=bsize)) bstep = int(_argval('--bstep', 1)) workers = int(_argval('--workers', default=workers)) decay = float(_argval('--decay', default=0.0005)) lr = float(_argval('--lr', default=0.001)) workdir = _argval('--workdir', default=ub.truepath('~/work/viame/yolo')) ovthresh = 0.5 coco_dsets = load_coco_datasets() datasets = { 'train': YoloCocoDataset(coco_dsets['train'], train=True), 'vali': YoloCocoDataset(coco_dsets['vali']), } anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float) datasets['train'].check_images_exist() datasets['vali'].check_images_exist() if workers > 0: cv2.setNumThreads(0) loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=False) for key, dset in datasets.items() } # simulated_bsize = bstep * batch_size hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': (light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': 0.001, 'nms_thresh': 0.5, }), 'criterion': ( light_region_loss.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, 'class_scale': 1.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh }), 'initializer': (nh.initializers.Pretrained, { 'fpath': light_yolo.initial_imagenet_weights(), }), 'optimizer': (torch.optim.SGD, { 'lr': lr / 10, 'momentum': 0.9, 'weight_decay': decay, }), 'scheduler': (nh.schedulers.ListedLR, { 'points': { 0: lr / 10, 1: lr, 59: lr * 1.1, 60: lr / 10, 90: lr / 100, }, 'interpolate': True }), 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': 160, 'max_epoch': 160, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) harn = YoloHarn(hyper=hyper) harn.config['use_tqdm'] = False harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def setup_harn(cmdline=True, **kw): """ CommandLine: xdoctest -m netharn.examples.segmentation setup_harn Example: >>> # xdoctest: +REQUIRES(--slow) >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2) """ import sys import ndsampler import kwarray # kwarray.seed_global(2108744082) config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): try: sampler.frames.prepare(workers=config['workers']) except AttributeError: pass torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode, workers=config['workers']) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None if config['normalize_inputs']: stats_dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)), rng=0)[0:min(1000, len(stats_dset))] stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3') input_stats = cacher.tryload() if input_stats is None: loader = torch.utils.data.DataLoader( stats_subset, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = {} print('input_stats = {!r}'.format(input_stats)) # TODO: infer numbr of channels model_ = (SegmentationModel, { 'arch': config['arch'], 'input_stats': input_stats, 'classes': torch_datasets['train'].classes.__json__(), 'in_channels': 3, }) initializer_ = nh.Initializer.coerce(config) # if config['init'] == 'cls': # initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=( nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 2, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def train(): """ Replicates parameters from https://github.com/kuangliu/pytorch-cifar The following is a table of kuangliu's reported accuracy and our measured accuracy for each model. The first column is kuangliu's reported accuracy, the second column is me running kuangliu's code, and the final column is using my own training harness (handles logging and whatnot) called netharn. model | kuangliu | rerun-kuangliu | netharn | ------------------------------------------------------- ResNet50 | 93.62% | 95.370% | 95.72% | <- how did that happen? DenseNet121 | 95.04% | 95.420% | 94.47% | DPN92 | 95.16% | 95.410% | 94.92% | """ import random import torchvision from torchvision import transforms np.random.seed(1031726816 % 4294967295) torch.manual_seed(137852547 % 4294967295) random.seed(2497950049 % 4294967295) # batch_size = int(ub.argval('--batch_size', default=128)) batch_size = int(ub.argval('--batch_size', default=64)) workers = int(ub.argval('--workers', default=2)) model_key = ub.argval('--model', default='densenet121') xpu = nh.XPU.cast('argv') lr = 0.1 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) workdir = ub.ensure_app_cache_dir('netharn') datasets = { 'train': torchvision.datasets.CIFAR10(root=workdir, train=True, download=True, transform=transform_train), 'test': torchvision.datasets.CIFAR10(root=workdir, train=False, download=True, transform=transform_test), } # For some reason the torchvision objects dont have the label names CIFAR10_CLASSNAMES = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck', ] datasets['train'].class_names = CIFAR10_CLASSNAMES datasets['test'].class_names = CIFAR10_CLASSNAMES n_classes = 10 # hacked in loaders = { key: torch.utils.data.DataLoader(dset, shuffle=key == 'train', num_workers=workers, batch_size=batch_size, pin_memory=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) initializer_ = (nh.initializers.KaimingNormal, { 'param': 0, 'mode': 'fan_in' }) # initializer_ = (initializers.LSUV, {}) available_models = { 'densenet121': (nh.models.densenet.DenseNet, { 'nblocks': [6, 12, 24, 16], 'growth_rate': 12, 'reduction': 0.5, 'num_classes': n_classes, }), 'resnet50': (nh.models.resnet.ResNet, { 'num_blocks': [3, 4, 6, 3], 'num_classes': n_classes, 'block': 'Bottleneck', }), 'dpn26': (nh.models.dual_path_net.DPN, dict( cfg={ 'in_planes': (96, 192, 384, 768), 'out_planes': (256, 512, 1024, 2048), 'num_blocks': (2, 2, 2, 2), 'dense_depth': (16, 32, 24, 128), 'num_classes': n_classes, })), 'dpn92': (nh.models.dual_path_net.DPN, dict( cfg={ 'in_planes': (96, 192, 384, 768), 'out_planes': (256, 512, 1024, 2048), 'num_blocks': (3, 4, 20, 3), 'dense_depth': (16, 32, 24, 128), 'num_classes': n_classes, })), } model_ = available_models[model_key] hyper = nh.HyperParams( datasets=datasets, nice='cifar10_' + model_key, loaders=loaders, workdir=workdir, xpu=xpu, model=model_, optimizer=(torch.optim.SGD, { 'lr': lr, 'weight_decay': 5e-4, 'momentum': 0.9, 'nesterov': True, }), scheduler=(nh.schedulers.ListedLR, { 'points': { 0: lr, 150: lr * 0.1, 250: lr * 0.01, }, 'interpolate': False }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': 350, 'max_epoch': 350, }), initializer=initializer_, criterion=(torch.nn.CrossEntropyLoss, {}), # Specify anything else that is special about your hyperparams here # Especially if you make a custom_batch_runner # TODO: type of augmentation as a parameter dependency # augment=str(datasets['train'].augmenter), # other=ub.dict_union({ # # 'colorspace': datasets['train'].output_colorspace, # }, datasets['train'].center_inputs.__dict__), ) harn = CIFAR_FitHarn(hyper=hyper) harn.initialize() harn.run()
def setup_harness(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness Example: >>> harn = setup_harness(dbname='PZ_MTEST') >>> harn.initialize() """ nice = kwargs.get('nice', 'untitled') batch_size = int(kwargs.get('batch_size', 6)) bstep = int(kwargs.get('bstep', 1)) workers = int(kwargs.get('workers', 0)) decay = float(kwargs.get('decay', 0.0005)) lr = float(kwargs.get('lr', 0.001)) dim = int(kwargs.get('dim', 416)) xpu = kwargs.get('xpu', 'argv') workdir = kwargs.get('workdir', None) dbname = kwargs.get('dbname', 'ggr2') if workdir is None: workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname)) ub.ensuredir(workdir) if dbname == 'ggr2': print('Creating torch CocoDataset') train_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018', ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018', ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir) vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir) print('Creating torch Datasets') datasets = { 'train': MatchingCocoDataset(train_sampler, train_dset, workdir, dim=dim, augment=True), 'vali': MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim), } else: from ibeis_utils import randomized_ibeis_dset datasets = randomized_ibeis_dset(dbname, dim=dim) for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) if workers > 0: import cv2 cv2.setNumThreads(0) loaders = { key: torch.utils.data.DataLoader(dset, batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } xpu = nh.XPU.cast(xpu) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': (MatchingNetworkLP, { 'p': 2, 'input_shape': (1, 3, dim, dim), }), 'criterion': (nh.criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), 'optimizer': (torch.optim.SGD, { 'lr': lr, 'weight_decay': decay, 'momentum': 0.9, 'nesterov': True, }), 'initializer': (nh.initializers.NoOp, {}), 'scheduler': (nh.schedulers.Exponential, { 'gamma': 0.99, 'stepsize': 2, }), # 'scheduler': (nh.schedulers.ListedLR, { # 'points': { # 1: lr * 1.0, # 19: lr * 1.1, # 20: lr * 0.1, # }, # 'interpolate': True # }), 'monitor': (nh.Monitor, { 'minimize': ['loss', 'pos_dist', 'brier'], 'maximize': ['accuracy', 'neg_dist', 'mcc'], 'patience': 40, 'max_epoch': 40, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def train(): import random import torchvision from torchvision import transforms xpu = nh.XPU.coerce('argv') config = { 'lr': float(ub.argval('--lr', default=0.1)), 'batch_size': int(ub.argval('--batch_size', default=64)), 'workers': int(ub.argval('--workers', default=2)), 'arch': ub.argval('--arch', default='resnet50'), 'dataset': ub.argval('--dataset', default='coco'), 'workdir': ub.argval('--workdir', default=ub.get_app_cache_dir('netharn')), 'seed': int(ub.argval('--seed', default=137852547)), 'deterministic': False, } # The work directory is where all intermediate results are dumped. ub.ensuredir(config['workdir']) # Take care of random seeding and ensuring appropriate determinisim torch.manual_seed((config['seed'] + 0) % int(2 ** 32 - 1)) random.seed((config['seed'] + 2360097502) % int(2 ** 32 - 1)) np.random.seed((config['seed'] + 893874269) % int(2 ** 32 - 1)) if torch.backends.cudnn.enabled: # TODO: ensure the CPU mode is also deterministic torch.backends.cudnn.deterministic = config['deterministic'] # Define augmentation strategy transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if config['dataset'] == 'coco': DATASET = torchvision.datasets.CocoDetection # TODO: download dset = DATASET(root=config['workdir'], download=True) meta_fpath = os.path.join(dset.root, dset.base_folder, 'meta') meta_dict = pickle.load(open(meta_fpath, 'rb')) categories = meta_dict['fine_label_names'] # categories = [ # 'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', # 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', # 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', # 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', # 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', # 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', # 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', # 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', # 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', # 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', # 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', # 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', # 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', # 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', # 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', # 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', # 'worm'] else: raise KeyError(config['dataset']) datasets = { 'train': DATASET(root=config['workdir'], train=True, transform=transform_train), 'test': DATASET(root=config['workdir'], train=False, transform=transform_test), } # For some reason the torchvision objects do not make the category names # easilly available. We set them here for ease of use. datasets['train'].categories = categories datasets['test'].categories = categories loaders = { key: torch.utils.data.DataLoader(dset, shuffle=key == 'train', num_workers=config['workers'], batch_size=config['batch_size'], pin_memory=True) for key, dset in datasets.items() } if config['workers'] > 0: # Solves pytorch deadlock issue #1355. import cv2 cv2.setNumThreads(0) # Choose which network architecture to train available_architectures = { 'densenet121': (nh.models.densenet.DenseNet, { 'nblocks': [6, 12, 24, 16], 'growth_rate': 12, 'reduction': 0.5, 'num_classes': len(categories), }), 'resnet50': (nh.models.resnet.ResNet, { 'num_blocks': [3, 4, 6, 3], 'num_classes': len(categories), 'block': 'Bottleneck', }), 'dpn26': (nh.models.dual_path_net.DPN, dict(cfg={ 'in_planes': (96, 192, 384, 768), 'out_planes': (256, 512, 1024, 2048), 'num_blocks': (2, 2, 2, 2), 'dense_depth': (16, 32, 24, 128), 'num_classes': len(categories), })), 'dpn92': (nh.models.dual_path_net.DPN, dict(cfg={ 'in_planes': (96, 192, 384, 768), 'out_planes': (256, 512, 1024, 2048), 'num_blocks': (3, 4, 20, 3), 'dense_depth': (16, 32, 24, 128), 'num_classes': len(categories), })), } model_ = available_architectures[config['arch']] # Note there are lots of different initializers including a special # pretrained initializer. initializer_ = (nh.initializers.KaimingNormal, {'param': 0, 'mode': 'fan_in'}) # Notice that arguments to hyperparameters are typically specified as a # tuple of (type, Dict), where the dictionary are the keyword arguments # that can be used to instanciate an instance of that class. While # this may be slightly awkward, it enables netharn to track hyperparameters # more effectively. Note that it is possible to simply pass an already # constructed instance of a class, but this causes information loss. hyper = nh.HyperParams( # Datasets must be preconstructed datasets=datasets, nice='cifar10_' + config['arch'], # Loader preconstructed loaders=loaders, workdir=config['workdir'], xpu=xpu, # The 6 major hyper components are best specified as a Tuple[type, dict] model=model_, optimizer=(torch.optim.SGD, { 'lr': config['lr'], 'weight_decay': 5e-4, 'momentum': 0.9, 'nesterov': True, }), scheduler=(nh.schedulers.ListedLR, { 'points': { 0: config['lr'], 150: config['lr'] * 0.1, 250: config['lr'] * 0.01, }, 'interpolate': False }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': 350, 'max_epoch': 350, }), initializer=initializer_, criterion=(torch.nn.CrossEntropyLoss, {}), # The rests of the keyword arguments are simply dictionaries used to # track other information. # Specify what augmentations you are performing for experiment tracking augment=datasets['train'].augmenter, other={ # Specify anything else that is special about your hyperparams here # Especially if you make a custom_batch_runner }, ) # Creating an instance of a Fitharn object is typically fast. harn = Coco_FitHarn(hyper=hyper) # Initializing a FitHarn object can take a little time, but not too much. # This is where instances of the model, optimizer, scheduler, monitor, and # initializer are created. This is also where we check if there is a # pre-existing checkpoint that we can restart from. harn.initialize() # This starts the main loop which will run until a the monitor's terminator # criterion is satisfied. If the initialize step loaded a checkpointed that # already met the termination criterion, then this will simply return. deploy_fpath = harn.run() # The returned deploy_fpath is the path to an exported netharn model. # This model is the on with the best weights according to the monitor. print('deploy_fpath = {!r}'.format(deploy_fpath))
def setup_harness(bsize=16, workers=0): """ CommandLine: python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness Example: >>> # DISABLE_DOCTSET >>> harn = setup_harness() >>> harn.initialize() """ xpu = nh.XPU.cast('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 4)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(years=[2007, 2012], split='trainval'), 'test': YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) simulated_bsize = bstep * batch_size hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.truepath('~/work/voc_yolo2'), 'datasets': datasets, # 'xpu': 'distributed(todo: fancy network stuff)', # 'xpu': 'cpu', # 'xpu': 'gpu:0,1,2,3', 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': ( light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': datasets['train'].anchors, 'conf_thresh': 0.001, # 'nms_thresh': 0.5, # reproduce original yolo 'nms_thresh': 0.4, # reproduce lightnet }), 'criterion': ( light_region_loss.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': datasets['train'].anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, 'class_scale': 1.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh }), 'initializer': ( nh.initializers.Pretrained, { # 'fpath': light_yolo.demo_voc_weights(), 'fpath': light_yolo.initial_imagenet_weights(), }), 'optimizer': ( torch.optim.SGD, { 'lr': lr / 10, 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), # Pascal 2007 + 2012 trainval has 16551 images # Pascal 2007 test has 4952 images # In the original YOLO, one batch is 64 images, # so one epoch is 16551 / 64 = 259 iterations. # # From the original YOLO VOC v2 config # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg # learning_rate=0.001 # burn_in=1000 # max_batches = 80200 # policy=steps # steps=40000,60000 # scales=.1,.1 # # However, the LIGHTNET values are # LR_STEPS = [250, 25000, 35000] # # Based in this, the iter to batch conversion is # # ((np.array([250, 25000, 35000, 1000, 40000, 60000, 80200]) / 256) + 1).astype(np.int) # array([ 1, 98, 137, 4, 157, 235, 314]) 'scheduler': ( nh.schedulers.ListedLR, { 'points': { # dividing by batch size was one of those unpublished details # 0: lr * 0.1 / simulated_bsize, # burnin # 4: lr * 1.0 / simulated_bsize, # 157: lr * 0.1 / simulated_bsize, # 235: lr * 0.001 / simulated_bsize, 0: lr * 0.1 / simulated_bsize, 1: lr * 1.0 / simulated_bsize, 60: lr * 0.1 / simulated_bsize, 90: lr * 0.001 / simulated_bsize, }, 'interpolate': False }), 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': 314, 'max_epoch': 314, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) harn = YoloHarn(hyper=hyper) harn.config['use_tqdm'] = False harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def setup_harn(cmdline=True, **kw): """ This creates the "The Classification Harness" (i.e. core ClfHarn object). This is where we programmatically connect our program arguments with the netharn HyperParameter standards. We are using :module:`scriptconfig` to capture these, but you could use click / argparse / etc. This function has the responsibility of creating our torch datasets, lazy computing input statistics, specifying our model architecture, schedule, initialization, optimizer, dynamics, XPU etc. These can usually be coerced using netharn API helpers and a "standardized" config dict. See the function code for details. Args: cmdline (bool, default=True): if True, behavior will be modified based on ``sys.argv``. Note this will activate the scriptconfig ``--help``, ``--dump`` and ``--config`` interactions. Kwargs: **kw: the overrides the default config for :class:`ClfConfig`. Note, command line flags have precedence if cmdline=True. Returns: ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn` object. Example: >>> # xdoctest: +SKIP >>> kw = {'datasets': 'special:shapes256'} >>> cmdline = False >>> harn = setup_harn(cmdline, **kw) >>> harn.initialize() """ import ndsampler config = ClfConfig(default=kw) config.load(cmdline=cmdline) print('config = {}'.format(ub.repr2(config.asdict()))) nh.configure_hacks(config) coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): sampler.frames.prepare(workers=config['workers']) torch_datasets = { 'train': ClfDataset( samplers['train'], input_dims=config['input_dims'], augmenter=config['augmenter'], ), 'vali': ClfDataset(samplers['vali'], input_dims=config['input_dims'], augmenter=False), } if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3') input_stats = cacher.tryload() channels = ChannelSpec.coerce(config['channels']) if input_stats is None: # Use parallel workers to load data faster from netharn.data.data_containers import container_collate from functools import partial collate_fn = partial(container_collate, num_devices=1) loader = torch.utils.data.DataLoader( stats_subset, collate_fn=collate_fn, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average of each fused channel stream channel_stats = { key: nh.util.RunningStats() for key in channels.keys() } assert len(channel_stats) == 1, ( 'only support one fused stream for now') for batch in ub.ProgIter(loader, desc='estimate mean/std'): for key, val in batch['inputs'].items(): try: for part in val.numpy(): channel_stats[key].update(part) except ValueError: # final batch broadcast error pass perchan_input_stats = {} for key, running in channel_stats.items(): running = ub.peek(channel_stats.values()) perchan_stats = running.simple(axis=(1, 2)) perchan_input_stats[key] = { 'std': perchan_stats['mean'].round(3), 'mean': perchan_stats['std'].round(3), } input_stats = ub.peek(perchan_input_stats.values()) cacher.save(input_stats) else: input_stats = {} torch_loaders = { tag: dset.make_loader( batch_size=config['batch_size'], num_batches=config['num_batches'], num_workers=config['workers'], shuffle=(tag == 'train'), balance=(config['balance'] if tag == 'train' else None), pin_memory=True) for tag, dset in torch_datasets.items() } initializer_ = None classes = torch_datasets['train'].classes modelkw = { 'arch': config['arch'], 'input_stats': input_stats, 'classes': classes.__json__(), 'channels': channels, } model = ClfModel(**modelkw) model._initkw = modelkw if initializer_ is None: initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams(name=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model, criterion=None, optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), scheduler=nh.Scheduler.coerce(config), initializer=initializer_, monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': 0.0, }), other={ 'name': config['name'], 'batch_size': config['batch_size'], 'balance': config['balance'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) harn = ClfHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 3, 'keep_freq': 10, 'tensorboard_groups': ['loss'], 'eager_dump_tensorboard': True, }) harn.intervals.update({}) harn.script_config = config return harn
def setup_harn(cmdline=True, **kwargs): """ cmdline, kwargs = False, {} """ import sys import ndsampler config = ImageClfConfig(default=kwargs) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3) data = cacher.tryload() if data is None: data = grab_tiny_imagenet_as_coco() cacher.save(data) coco_datasets = data # setup_coco_datasets() dset = coco_datasets['train'] print('train dset = {!r}'.format(dset)) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: ImagClfDataset( sampler, config['input_dims'], augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), pin_memory=True) for tag, dset in torch_datasets.items() } import torchvision # TODO: netharn should allow for this model_ = torchvision.models.resnet50(pretrained=False) # model_ = (, { # 'classes': torch_datasets['train'].classes, # 'in_channels': 3, # }) initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': 0.0, }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = ImageClfHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_yolo_harness(bsize=16, workers=0): """ CommandLine: python ~/code/netharn/examples/yolo_voc.py setup_yolo_harness Example: >>> # DISABLE_DOCTSET >>> harn = setup_yolo_harness() >>> harn.initialize() """ xpu = nh.XPU.cast('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 4)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 simulated_bsize = bstep * batch_size # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(years=[2007, 2012], split='trainval'), 'test': YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True, resize_rate=10 * bstep, drop_last=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) # assert simulated_bsize == 64, 'must be 64' # Pascal 2007 + 2012 trainval has 16551 images # Pascal 2007 test has 4952 images # In the original YOLO, one batch is 64 images, therefore: # # ONE EPOCH is 16551 / 64 = 258.609375 = 259 iterations. # # From the original YOLO VOC v2 config # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg # learning_rate=0.001 # burn_in=1000 # max_batches = 80200 # policy=steps # steps=40000,60000 # scales=.1,.1 # # However, the LIGHTNET values are # LR_STEPS = [250, 25000, 35000] # # The DARNKET STEPS ARE: # DN_STEPS = 1000, 40000, 60000, 80200 # # Based in this, the iter to batch conversion is # # Key lightnet batch numbers # >>> np.array([250, 25000, 30000, 35000, 45000]) / (16512 / 64) # array([0.9689, 96.899, 116.2790, 135.658, 174.4186]) # -> Round # array([ 1., 97., 135.]) # >>> np.array([1000, 40000, 60000, 80200]) / 258 # array([ 3.86683584, 154.67343363, 232.01015044, 310.12023443]) # -> Round # array(4, 157, 232, 310]) # array([ 3.87596899, 155.03875969, 232.55813953, 310.85271318]) if not ub.argflag('--eav'): lr_step_points = { # 0: lr * 0.1 / simulated_bsize, # burnin # 4: lr * 1.0 / simulated_bsize, 0: lr * 1.0 / simulated_bsize, 154: lr * 1.0 / simulated_bsize, 155: lr * 0.1 / simulated_bsize, 232: lr * 0.1 / simulated_bsize, 233: lr * 0.01 / simulated_bsize, } max_epoch = 311 scheduler_ = ( nh.schedulers.core.YOLOScheduler, { 'points': lr_step_points, # 'interpolate': False, 'interpolate': True, 'burn_in': 0.96899225 if ub.argflag('--eav') else 3.86683584, # number of epochs to burn_in for. approx 1000 batches? 'dset_size': len(datasets['train']), # when drop_last=False # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize, # make a multiple of batch_size because drop_last=True 'batch_size': batch_size, }) else: lr_step_points = { # dividing by batch size was one of those unpublished details 0: lr * 0.1 / simulated_bsize, 1: lr * 1.0 / simulated_bsize, 96: lr * 1.0 / simulated_bsize, 97: lr * 0.1 / simulated_bsize, 135: lr * 0.1 / simulated_bsize, 136: lr * 0.01 / simulated_bsize, } max_epoch = 176 scheduler_ = (nh.schedulers.ListedLR, { 'points': lr_step_points, 'interpolate': False, }) weights = ub.argval('--weights', default=None) if weights is None or weights == 'imagenet': weights = light_yolo.initial_imagenet_weights() elif weights == 'lightnet': weights = light_yolo.demo_voc_weights() else: print('weights = {!r}'.format(weights)) # Anchors anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) from netharn.models.yolo2 import region_loss2 # from netharn.models.yolo2 import light_region_loss hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.truepath('~/work/voc_yolo2'), 'datasets': datasets, # 'xpu': 'distributed(todo: fancy network stuff)', # 'xpu': 'cpu', # 'xpu': 'gpu:0,1,2,3', 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': ( light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': 0.001, # 'conf_thresh': 0.1, # make training a bit faster 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }), 'criterion': ( region_loss2.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'reduction': 32, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }), # 'criterion': (light_region_loss.RegionLoss, { # 'num_classes': datasets['train'].num_classes, # 'anchors': anchors, # 'object_scale': 5.0, # 'noobject_scale': 1.0, # # eav version originally had a random *2 in cls loss, # # we removed, that but we can replicate it here. # 'class_scale': 1.0 if not ub.argflag('--eav') else 2.0, # 'coord_scale': 1.0, # 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, # # 'small_boxes': not ub.argflag('--eav'), # 'small_boxes': True, # 'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0, # }), 'initializer': (nh.initializers.Pretrained, { 'fpath': weights, }), 'optimizer': ( torch.optim.SGD, { 'lr': lr_step_points[0], 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), 'scheduler': scheduler_, 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': max_epoch, 'max_epoch': max_epoch, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) print('max_epoch = {!r}'.format(max_epoch)) harn = YoloHarn(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None harn.config[ 'large_loss'] = 1000 # tell netharn when to check for divergence return harn
def setup_yolo_harness(bsize=16, workers=0): """ CommandLine: python -m netharn.examples.yolo_voc setup_yolo_harness Example: >>> # DISABLE_DOCTSET >>> harn = setup_yolo_harness() >>> harn.initialize() """ xpu = nh.XPU.coerce('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 4)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 simulated_bsize = bstep * batch_size nh.configure_hacks(workers=workers) # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(years=[2007, 2012], split='trainval'), # 'test': YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True, resize_rate=10 * bstep, drop_last=True) for key, dset in datasets.items() } anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) if not ub.argflag('--eav'): lr_step_points = { # 0: lr * 0.1 / simulated_bsize, # burnin # 4: lr * 1.0 / simulated_bsize, 0: lr * 1.0 / simulated_bsize, 154: lr * 1.0 / simulated_bsize, 155: lr * 0.1 / simulated_bsize, 232: lr * 0.1 / simulated_bsize, 233: lr * 0.01 / simulated_bsize, } max_epoch = 311 scheduler_ = ( nh.schedulers.core.YOLOScheduler, { 'points': lr_step_points, # 'interpolate': False, 'interpolate': True, 'burn_in': 0.96899225 if ub.argflag('--eav') else 3.86683584, # number of epochs to burn_in for. approx 1000 batches? 'dset_size': len(datasets['train']), # when drop_last=False # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize, # make a multiple of batch_size because drop_last=True 'batch_size': batch_size, }) from netharn.models.yolo2 import light_region_loss criterion_ = ( light_region_loss.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, # eav version originally had a random *2 in cls loss, # we removed, that but we can replicate it here. 'class_scale': 1.0 if not ub.argflag('--eav') else 2.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh 'seen_thresh': 12800, # 'small_boxes': not ub.argflag('--eav'), 'small_boxes': True, 'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0, }) else: lr_step_points = { # dividing by batch size was one of those unpublished details 0: lr * 0.1 / simulated_bsize, 1: lr * 1.0 / simulated_bsize, 96: lr * 1.0 / simulated_bsize, 97: lr * 0.1 / simulated_bsize, 135: lr * 0.1 / simulated_bsize, 136: lr * 0.01 / simulated_bsize, } max_epoch = 176 scheduler_ = (nh.schedulers.ListedLR, { 'points': lr_step_points, 'interpolate': False, }) from netharn.models.yolo2 import region_loss2 criterion_ = ( region_loss2.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'reduction': 32, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }) weights = ub.argval('--weights', default=None) if weights is None or weights == 'imagenet': weights = light_yolo.initial_imagenet_weights() elif weights == 'lightnet': weights = light_yolo.demo_voc_weights() else: print('weights = {!r}'.format(weights)) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.expandpath('~/work/voc_yolo2'), 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': ( light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': 0.001, # 'conf_thresh': 0.1, # make training a bit faster 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }), 'criterion': criterion_, 'initializer': (nh.initializers.Pretrained, { 'fpath': weights, }), 'optimizer': ( torch.optim.SGD, { 'lr': lr_step_points[0], 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), 'scheduler': scheduler_, 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': max_epoch, 'max_epoch': max_epoch, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) print('max_epoch = {!r}'.format(max_epoch)) harn = YoloHarn(hyper=hyper) harn.preferences['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None harn.preferences[ 'large_loss'] = 1000 # tell netharn when to check for divergence return harn
def setup_harness(bsize=16, workers=0): """ CommandLine: python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness Example: >>> harn = setup_harness() >>> harn.initialize() """ xpu = nh.XPU.cast('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 1)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(split='trainval'), 'test': YoloVOCDataset(split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } # simulated_bsize = bstep * batch_size hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.truepath('~/work/voc_yolo2'), 'datasets': datasets, # 'xpu': 'distributed(todo: fancy network stuff)', # 'xpu': 'cpu', # 'xpu': 'gpu:0,1,2,3', 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': (light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': datasets['train'].anchors, 'conf_thresh': 0.001, 'nms_thresh': 0.5, }), 'criterion': ( light_region_loss.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': datasets['train'].anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, 'class_scale': 1.0, 'coord_scale': 1.0, 'thresh': 0.6, # iou_thresh }), 'initializer': ( nh.initializers.Pretrained, { # 'fpath': light_yolo.demo_weights(), 'fpath': light_yolo.initial_imagenet_weights(), }), 'optimizer': (torch.optim.SGD, { 'lr': lr / 10, 'momentum': 0.9, 'weight_decay': decay, }), 'scheduler': ( nh.schedulers.ListedLR, { 'points': { # dividing by batch size was one of those unpublished details # 0: lr / simulated_bsize, # 5: .01 / simulated_bsize, # 60: .011 / simulated_bsize, # 90: .001 / simulated_bsize, 0: lr / 10, 1: lr, 59: lr * 1.1, 60: lr / 10, 90: lr / 100, }, 'interpolate': True }), 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': 160, 'max_epoch': 160, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) harn = YoloHarn(hyper=hyper) harn.config['use_tqdm'] = False harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def setup_harn(cmdline=True, **kw): """ Ignore: >>> from object_detection import * # NOQA >>> cmdline = False >>> kw = { >>> 'train_dataset': '~/data/VOC/voc-trainval.mscoco.json', >>> 'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json', >>> } >>> harn = setup_harn(**kw) """ import ndsampler from ndsampler import coerce_data # Seed other global rngs just in case something uses them under the hood kwarray.seed_global(1129989262, offset=1797315558) config = DetectFitConfig(default=kw, cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs ub.ensuredir(config['workdir']) # Load ndsampler.CocoDataset objects from info in the config subsets = coerce_data.coerce_datasets(config) samplers = {} for tag, subset in subsets.items(): print('subset = {!r}'.format(subset)) sampler = ndsampler.CocoSampler(subset, workdir=config['workdir']) samplers[tag] = sampler torch_datasets = { tag: DetectDataset( sampler, input_dims=config['input_dims'], augment=config['augment'] if (tag == 'train') else False, ) for tag, sampler in samplers.items() } print('make loaders') loaders_ = { tag: torch.utils.data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), collate_fn=nh.data.collate.padded_collate, pin_memory=True) for tag, dset in torch_datasets.items() } # for x in ub.ProgIter(loaders_['train']): # pass if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2') input_stats = cacher.tryload() if input_stats is None: # Use parallel workers to load data faster loader = torch.utils.data.DataLoader( stats_subset, collate_fn=nh.data.collate.padded_collate, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = None print('input_stats = {!r}'.format(input_stats)) initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal') print('initializer_ = {!r}'.format(initializer_)) arch = config['arch'] if arch == 'yolo2': if False: dset = samplers['train'].dset print('dset = {!r}'.format(dset)) # anchors = yolo2.find_anchors(dset) anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) classes = samplers['train'].classes model_ = (yolo2.Yolo2, { 'classes': classes, 'anchors': anchors, 'conf_thresh': 0.001, 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }) model = model_[0](**model_[1]) model._initkw = model_[1] criterion_ = ( yolo2.YoloLoss, { 'coder': model.coder, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }) else: raise KeyError(arch) scheduler_ = nh.Scheduler.coerce(config) print('scheduler_ = {!r}'.format(scheduler_)) optimizer_ = nh.Optimizer.coerce(config) print('optimizer_ = {!r}'.format(optimizer_)) dynamics_ = nh.Dynamics.coerce(config) print('dynamics_ = {!r}'.format(dynamics_)) xpu = nh.XPU.coerce(config['xpu']) print('xpu = {!r}'.format(xpu)) import sys hyper = nh.HyperParams( **{ 'nice': config['nice'], 'workdir': config['workdir'], 'datasets': torch_datasets, 'loaders': loaders_, 'xpu': xpu, 'model': model, 'criterion': criterion_, 'initializer': initializer_, 'optimizer': optimizer_, 'dynamics': dynamics_, # 'optimizer': (torch.optim.SGD, { # 'lr': lr_step_points[0], # 'momentum': 0.9, # 'dampening': 0, # # multiplying by batch size was one of those unpublished details # 'weight_decay': decay * simulated_bsize, # }), 'scheduler': scheduler_, 'monitor': ( nh.Monitor, { 'minimize': ['loss'], # 'maximize': ['mAP'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': config['batch_size'], 'nice': config['nice'], 'ovthresh': config['ovthresh'], # used in mAP computation }, 'extra': { 'config': ub.repr2(config.asdict()), 'argv': sys.argv, } }) print('hyper = {!r}'.format(hyper)) print('make harn') harn = DetectHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 2, 'keep_freq': 30, 'export_modules': ['netharn'], # TODO 'prog_backend': 'progiter', # alternative: 'tqdm' 'keyboard_debug': True, }) harn.intervals.update({ 'log_iter_train': 50, }) harn.fit_config = config print('harn = {!r}'.format(harn)) print('samplers = {!r}'.format(samplers)) return harn
def setup_harn(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harn Args: dbname (str): Name of IBEIS database to use nice (str): Custom tag for this run workdir (PathLike): path to dump all the intermedate results dim (int): Width and height of the network input batch_size (int): Base batch size. Number of examples in GPU at any time. bstep (int): Multiply by batch_size to simulate a larger batches. lr (float): Base learning rate decay (float): Weight decay (L2 regularization) workers (int): Number of parallel data loader workers xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`) indicating multiple GPUs triple (bool): if True uses triplet loss, otherwise contrastive loss norm_desc (bool): if True normalizes the descriptors pretrained (PathLike): path to a compatible pretrained model margin (float): margin for loss criterion soft (bool): use soft margin Example: >>> harn = setup_harn(dbname='PZ_MTEST') >>> harn.initialize() """ config = parse_config(**kwargs) nh.configure_hacks(config) datasets, workdir = setup_datasets(config) loaders = { tag: dset.make_loader( shuffle=(tag == 'train'), batch_size=config['batch_size'], num_batches=(config['num_batches'] if tag == 'train' else config['num_batches'] // 10), k=config['k'], p=config['p'], num_workers=config['workers'], ) for tag, dset in datasets.items() } if config['scheduler'] == 'steplr': from torch.optim import lr_scheduler scheduler_ = (lr_scheduler.StepLR, dict(step_size=8, gamma=0.1, last_epoch=-1)) else: scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70') hyper = nh.HyperParams( **{ 'nice': config['nice'], 'workdir': config['workdir'], 'datasets': datasets, 'loaders': loaders, 'xpu': nh.XPU.coerce(config['xpu']), 'model': ( nh.models.DescriptorNetwork, { 'input_shape': (1, 3, config['dim'], config['dim']), 'norm_desc': config['norm_desc'], # 'hidden_channels': [512, 256] 'hidden_channels': [256], 'desc_size': 128, }), 'initializer': nh.Initializer.coerce(config), 'optimizer': nh.Optimizer.coerce(config), 'scheduler': scheduler_, 'criterion': (nh.criterions.TripletLoss, { 'margin': config['margin'], 'soft': config['soft'], }), 'monitor': nh.Monitor.coerce( config, minimize=['loss', 'pos_dist', 'brier'], maximize=['accuracy', 'neg_dist', 'mcc'], patience=100, max_epoch=100, ), 'dynamics': nh.Dynamics.coerce(config), 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.preferences['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn