def setup_sampler(config): workdir = nh.configure_workdir(config, workdir=join('~/work/siam-ibeis2', config['dbname'])) # TODO: cleanup and hook into ibeis AI if config['dbname'] == 'ggr2': print('Creating torch CocoDataset') root = ub.expandpath('~/data/') print('root = {!r}'.format(root)) train_dset = ndsampler.CocoDataset( data=join(root, 'ggr2-coco/annotations/instances_train2018.json'), img_root=join(root, 'ggr2-coco/images/train2018'), ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data=join(root, 'ggr2-coco/annotations/instances_val2018.json'), img_root=join(root, 'ggr2-coco/images/val2018'), ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') samplers = { 'train': ndsampler.CocoSampler(train_dset, workdir=workdir), 'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir), } if config['dbname'] == 'ggr2-revised': print('Creating torch CocoDataset') root = ub.expandpath('~/data/ggr2.coco.revised') print('root = {!r}'.format(root)) train_dset = ndsampler.CocoDataset( data=join(root, 'annotations/instances_train2019.json'), img_root=join(root, 'images/train2019'), ) train_dset.hashid = 'ggr2-coco-revised-train2019' vali_dset = ndsampler.CocoDataset( data=join(root, 'annotations/instances_val2019.json'), img_root=join(root, 'images/val2019'), ) vali_dset.hashid = 'ggr2-coco-revised-val2019' print('Creating samplers') samplers = { 'train': ndsampler.CocoSampler(train_dset, workdir=workdir), 'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir), } else: raise KeyError(config['dbname']) return samplers, workdir
def demo(cls, **kwargs): from netharn.data.grab_camvid import grab_coco_camvid import ndsampler dset = grab_coco_camvid() sampler = ndsampler.CocoSampler(dset, workdir=None, backend='npy') self = cls(sampler, **kwargs) return self
def demo(WindowedSamplerDataset, key='habcam', **kwargs): import ndsampler if key == 'habcam': dset_fpath = ub.expandpath('~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json') workdir = ub.expandpath('~/work/bioharn') dset = ndsampler.CocoDataset(dset_fpath) sampler = ndsampler.CocoSampler(dset, workdir=workdir, backend=None) else: sampler = ndsampler.CocoSampler.demo(key) self = WindowedSamplerDataset(sampler, **kwargs) return self
def grab_camvid_sampler(): """ Grab a ndsampler.CocoSampler object for the CamVid dataset. Returns: ndsampler.CocoSampler: sampler Example: >>> # xdoctest: +REQUIRES(--download) >>> sampler = grab_camvid_sampler() >>> print('sampler = {!r}'.format(sampler)) >>> # sampler.load_sample() >>> for gid in ub.ProgIter(sampler.image_ids, desc='load image'): >>> img = sampler.load_image(gid) """ import ndsampler dset = grab_coco_camvid() workdir = ub.ensure_app_cache_dir('camvid') sampler = ndsampler.CocoSampler(dset, workdir=workdir) return sampler
def __init__(self, storage_mode='numpy', return_mode='tensor', total=24e7): self.return_mode = return_mode self.storage_mode = storage_mode assert self.return_mode in {'tensor', 'dict', 'tuple', 'list'} if storage_mode == 'numpy': self.data = np.array([x for x in range(int(total))]) elif storage_mode == 'python': self.data = [x for x in range(int(total))] elif storage_mode == 'ndsampler-sql': import ndsampler import kwcoco from kwcoco.coco_sql_dataset import ensure_sql_coco_view dset = kwcoco.CocoDataset.demo('vidshapes', num_videos=1, num_frames=total, gsize=(64, 64)) dset = ensure_sql_coco_view(dset) print('dset.uri = {!r}'.format(dset.uri)) dset.hashid = 'fake-hashid' sampler = ndsampler.CocoSampler(dset, backend=None) self.data = sampler # sampler.load_item(0) # tr = sampler.regions.get_item(0) # sampler.load_sample(tr) # assert total <= 1000 # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total)) # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total)) elif storage_mode == 'ndsampler': import ndsampler # assert total <= 10000 sampler = ndsampler.CocoSampler.demo('vidshapes', num_videos=1, num_frames=total, gsize=(64, 64)) self.data = sampler else: raise KeyError(storage_mode)
def detect_cli(config={}): """ CommandLine: python -m bioharn.detect_predict --help CommandLine: python -m bioharn.detect_predict \ --dataset=~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_test.mscoco.json \ --deployed=/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip \ --out_dpath=~/work/bioharn/habcam_test_out \ --draw=100 \ --input_dims=512,512 \ --xpu=0 --batch_size=1 Ignore: >>> config = {} >>> config['dataset'] = '~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json' >>> config['deployed'] = '/home/joncrall/work/bioharn/fit/runs/bioharn-det-v11-test-cascade/myovdqvi/deploy_MM_CascadeRCNN_myovdqvi_035_MVKVVR.zip' >>> config['out_dpath'] = 'out' """ import kwarray import ndsampler from os.path import basename, join, exists, isfile, isdir # NOQA config = DetectPredictCLIConfig(config, cmdline=True) print('config = {}'.format(ub.repr2(config.asdict()))) out_dpath = ub.expandpath(config.get('out_dpath')) import six if isinstance(config['dataset'], six.string_types): if config['dataset'].endswith('.json'): dataset_fpath = ub.expandpath(config['dataset']) coco_dset = ndsampler.CocoDataset(dataset_fpath) # Running prediction is much faster if you can build a sampler. sampler_backend = { 'type': 'cog', 'config': { 'compress': 'JPEG', }, '_hack_old_names': False, # flip to true to use legacy caches } sampler_backend = None print('coco hashid = {}'.format(coco_dset._build_hashid())) else: sampler_backend = None if exists(config['dataset']) and isfile(config['dataset']): # Single image case image_fpath = ub.expandpath(config['dataset']) coco_dset = ndsampler.CocoDataset() coco_dset.add_image(image_fpath) elif isinstance(config['dataset'], list): # Multiple image case gpaths = config['dataset'] gpaths = [ub.expandpath(g) for g in gpaths] coco_dset = ndsampler.CocoDataset() for gpath in gpaths: coco_dset.add_image(gpath) else: raise TypeError(config['dataset']) draw = config.get('draw') workdir = ub.expandpath(config.get('workdir')) det_outdir = ub.ensuredir((out_dpath, 'pred')) pred_config = ub.dict_subset(config, DetectPredictConfig.default) print('Create sampler') sampler = ndsampler.CocoSampler(coco_dset, workdir=workdir, backend=sampler_backend) print('prepare frames') sampler.frames.prepare(workers=config['workers']) print('Create predictor') predictor = DetectPredictor(pred_config) print('Ensure model') predictor._ensure_model() pred_dataset = coco_dset.dataset.copy() pred_dataset['annotations'] = [] pred_dset = ndsampler.CocoDataset(pred_dataset) # self = predictor predictor.config['verbose'] = 1 pred_gen = predictor.predict_sampler(sampler) buffered_gen = AsyncBufferedGenerator(pred_gen, size=coco_dset.n_images) gid_to_pred = {} prog = ub.ProgIter(buffered_gen, total=coco_dset.n_images, desc='buffered detect') for img_idx, (gid, dets) in enumerate(prog): gid_to_pred[gid] = dets for ann in dets.to_coco(): ann['image_id'] = gid try: catname = ann['category_name'] ann['category_id'] = pred_dset._resolve_to_cid(catname) except KeyError: if 'category_id' not in ann: cid = pred_dset.add_category(catname) ann['category_id'] = cid pred_dset.add_annotation(**ann) single_img_coco = pred_dset.subset([gid]) single_pred_dpath = ub.ensuredir((det_outdir, 'single_image')) single_pred_fpath = join(single_pred_dpath, 'detections_gid_{:08d}.mscoco.json'.format(gid)) single_img_coco.dump(single_pred_fpath, newlines=True) if draw is True or (draw and img_idx < draw): draw_outdir = ub.ensuredir((out_dpath, 'draw')) img_fpath = coco_dset.load_image_fpath(gid) gname = basename(img_fpath) viz_fname = ub.augpath(gname, prefix='detect_', ext='.jpg') viz_fpath = join(draw_outdir, viz_fname) image = kwimage.imread(img_fpath) flags = dets.scores > .2 flags[kwarray.argmaxima(dets.scores, num=10)] = True top_dets = dets.compress(flags) toshow = top_dets.draw_on(image, alpha=None) # kwplot.imshow(toshow) kwimage.imwrite(viz_fpath, toshow, space='rgb') pred_fpath = join(det_outdir, 'detections.mscoco.json') print('Dump detections to pred_fpath = {!r}'.format(pred_fpath)) pred_dset.dump(pred_fpath, newlines=True)
def _coerce_datasets(config): import netharn as nh import ndsampler import numpy as np from torchvision import transforms coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): sampler.frames.prepare(workers=config['workers']) # TODO: basic ndsampler torch dataset, likely has to support the transforms # API, bleh. transform = transforms.Compose([ transforms.Resize(config['input_dims']), transforms.CenterCrop(config['input_dims']), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) torch_datasets = { key: SamplerDataset( sapmler, transform=transform, # input_dims=config['input_dims'], # augmenter=config['augmenter'] if key == 'train' else None, ) for key, sapmler in samplers.items() } # self = torch_dset = torch_datasets['train'] if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) import kwarray _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3') input_stats = cacher.tryload() from netharn.data.channel_spec import ChannelSpec channels = ChannelSpec.coerce(config['channels']) if input_stats is None: # Use parallel workers to load data faster from netharn.data.data_containers import container_collate from functools import partial collate_fn = partial(container_collate, num_devices=1) loader = torch.utils.data.DataLoader( stats_subset, collate_fn=collate_fn, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average of each fused channel stream channel_stats = {key: nh.util.RunningStats() for key in channels.keys()} assert len(channel_stats) == 1, ( 'only support one fused stream for now') for batch in ub.ProgIter(loader, desc='estimate mean/std'): if isinstance(batch, (tuple, list)): inputs = {'rgb': batch[0]} # make assumption else: inputs = batch['inputs'] for key, val in inputs.items(): try: for part in val.numpy(): channel_stats[key].update(part) except ValueError: # final batch broadcast error pass perchan_input_stats = {} for key, running in channel_stats.items(): running = ub.peek(channel_stats.values()) perchan_stats = running.simple(axis=(1, 2)) perchan_input_stats[key] = { 'std': perchan_stats['mean'].round(3), 'mean': perchan_stats['std'].round(3), } input_stats = ub.peek(perchan_input_stats.values()) cacher.save(input_stats) else: input_stats = {} torch_loaders = { tag: dset.make_loader( batch_size=config['batch_size'], num_batches=config['num_batches'], num_workers=config['workers'], shuffle=(tag == 'train'), balance=(config['balance'] if tag == 'train' else None), pin_memory=True) for tag, dset in torch_datasets.items() } dataset_info = { 'torch_datasets': torch_datasets, 'torch_loaders': torch_loaders, 'input_stats': input_stats } return dataset_info
def setup_harn(cmdline=True, **kw): """ Ignore: >>> from object_detection import * # NOQA >>> cmdline = False >>> kw = { >>> 'train_dataset': '~/data/VOC/voc-trainval.mscoco.json', >>> 'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json', >>> } >>> harn = setup_harn(**kw) """ import ndsampler from ndsampler import coerce_data # Seed other global rngs just in case something uses them under the hood kwarray.seed_global(1129989262, offset=1797315558) config = DetectFitConfig(default=kw, cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs ub.ensuredir(config['workdir']) # Load ndsampler.CocoDataset objects from info in the config subsets = coerce_data.coerce_datasets(config) samplers = {} for tag, subset in subsets.items(): print('subset = {!r}'.format(subset)) sampler = ndsampler.CocoSampler(subset, workdir=config['workdir']) samplers[tag] = sampler torch_datasets = { tag: DetectDataset( sampler, input_dims=config['input_dims'], augment=config['augment'] if (tag == 'train') else False, ) for tag, sampler in samplers.items() } print('make loaders') loaders_ = { tag: torch.utils.data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), collate_fn=nh.data.collate.padded_collate, pin_memory=True) for tag, dset in torch_datasets.items() } # for x in ub.ProgIter(loaders_['train']): # pass if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2') input_stats = cacher.tryload() if input_stats is None: # Use parallel workers to load data faster loader = torch.utils.data.DataLoader( stats_subset, collate_fn=nh.data.collate.padded_collate, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = None print('input_stats = {!r}'.format(input_stats)) initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal') print('initializer_ = {!r}'.format(initializer_)) arch = config['arch'] if arch == 'yolo2': if False: dset = samplers['train'].dset print('dset = {!r}'.format(dset)) # anchors = yolo2.find_anchors(dset) anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) classes = samplers['train'].classes model_ = (yolo2.Yolo2, { 'classes': classes, 'anchors': anchors, 'conf_thresh': 0.001, 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }) model = model_[0](**model_[1]) model._initkw = model_[1] criterion_ = ( yolo2.YoloLoss, { 'coder': model.coder, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }) else: raise KeyError(arch) scheduler_ = nh.Scheduler.coerce(config) print('scheduler_ = {!r}'.format(scheduler_)) optimizer_ = nh.Optimizer.coerce(config) print('optimizer_ = {!r}'.format(optimizer_)) dynamics_ = nh.Dynamics.coerce(config) print('dynamics_ = {!r}'.format(dynamics_)) xpu = nh.XPU.coerce(config['xpu']) print('xpu = {!r}'.format(xpu)) import sys hyper = nh.HyperParams( **{ 'nice': config['nice'], 'workdir': config['workdir'], 'datasets': torch_datasets, 'loaders': loaders_, 'xpu': xpu, 'model': model, 'criterion': criterion_, 'initializer': initializer_, 'optimizer': optimizer_, 'dynamics': dynamics_, # 'optimizer': (torch.optim.SGD, { # 'lr': lr_step_points[0], # 'momentum': 0.9, # 'dampening': 0, # # multiplying by batch size was one of those unpublished details # 'weight_decay': decay * simulated_bsize, # }), 'scheduler': scheduler_, 'monitor': ( nh.Monitor, { 'minimize': ['loss'], # 'maximize': ['mAP'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': config['batch_size'], 'nice': config['nice'], 'ovthresh': config['ovthresh'], # used in mAP computation }, 'extra': { 'config': ub.repr2(config.asdict()), 'argv': sys.argv, } }) print('hyper = {!r}'.format(hyper)) print('make harn') harn = DetectHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 2, 'keep_freq': 30, 'export_modules': ['netharn'], # TODO 'prog_backend': 'progiter', # alternative: 'tqdm' 'keyboard_debug': True, }) harn.intervals.update({ 'log_iter_train': 50, }) harn.fit_config = config print('harn = {!r}'.format(harn)) print('samplers = {!r}'.format(samplers)) return harn
def setup_harness(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness Example: >>> harn = setup_harness(dbname='PZ_MTEST') >>> harn.initialize() """ nice = kwargs.get('nice', 'untitled') batch_size = int(kwargs.get('batch_size', 6)) bstep = int(kwargs.get('bstep', 1)) workers = int(kwargs.get('workers', 0)) decay = float(kwargs.get('decay', 0.0005)) lr = float(kwargs.get('lr', 0.001)) dim = int(kwargs.get('dim', 416)) xpu = kwargs.get('xpu', 'argv') workdir = kwargs.get('workdir', None) dbname = kwargs.get('dbname', 'ggr2') if workdir is None: workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname)) ub.ensuredir(workdir) if dbname == 'ggr2': print('Creating torch CocoDataset') train_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018', ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018', ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir) vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir) print('Creating torch Datasets') datasets = { 'train': MatchingCocoDataset(train_sampler, train_dset, workdir, dim=dim, augment=True), 'vali': MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim), } else: from ibeis_utils import randomized_ibeis_dset datasets = randomized_ibeis_dset(dbname, dim=dim) for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) if workers > 0: import cv2 cv2.setNumThreads(0) loaders = { key: torch.utils.data.DataLoader(dset, batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } xpu = nh.XPU.cast(xpu) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': (MatchingNetworkLP, { 'p': 2, 'input_shape': (1, 3, dim, dim), }), 'criterion': (nh.criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), 'optimizer': (torch.optim.SGD, { 'lr': lr, 'weight_decay': decay, 'momentum': 0.9, 'nesterov': True, }), 'initializer': (nh.initializers.NoOp, {}), 'scheduler': (nh.schedulers.Exponential, { 'gamma': 0.99, 'stepsize': 2, }), # 'scheduler': (nh.schedulers.ListedLR, { # 'points': { # 1: lr * 1.0, # 19: lr * 1.1, # 20: lr * 0.1, # }, # 'interpolate': True # }), 'monitor': (nh.Monitor, { 'minimize': ['loss', 'pos_dist', 'brier'], 'maximize': ['accuracy', 'neg_dist', 'mcc'], 'patience': 40, 'max_epoch': 40, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def setup_harn(cmdline=True, **kw): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples')) >>> from sseg_camvid import * # NOQA >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4) """ import sys import ndsampler config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs assert config['datasets'] == 'special:camvid' coco_datasets = setup_coco_datasets() workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augment=((tag == 'train') and config['augment']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None initializer_ = nh.Initializer.coerce(config) if config['arch'] == 'unet': # Note: UNet can get through 256x256 images at a rate of ~17Hz with # batch_size=8. This is pretty slow and can likely be improved by fixing # some of the weird padding / mirror stuff I have to do in unet to get # output_dims = input_dims. from netharn.models.unet import UNet model_ = (UNet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'segnet': from netharn.models.segnet import Segnet model_ = (Segnet, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'psp': from netharn.models.psp import PSPNet_Resnet50_8s model_ = (PSPNet_Resnet50_8s, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) elif config['arch'] == 'deeplab': from netharn.models.deeplab import DeepLab_ASPP model_ = (DeepLab_ASPP, { 'classes': torch_datasets['train'].classes, 'in_channels': 3, }) else: raise KeyError(config['arch']) if config['init'] == 'cls': initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_harn(cmdline=True, **kw): """ CommandLine: xdoctest -m netharn.examples.segmentation setup_harn Example: >>> # xdoctest: +REQUIRES(--slow) >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2} >>> cmdline = False >>> # Just sets up the harness, does not do any heavy lifting >>> harn = setup_harn(cmdline=cmdline, **kw) >>> # >>> harn.initialize() >>> # >>> batch = harn._demo_batch(tag='train') >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2) """ import sys import ndsampler import kwarray # kwarray.seed_global(2108744082) config = SegmentationConfig(default=kw) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): try: sampler.frames.prepare(workers=config['workers']) except AttributeError: pass torch_datasets = { tag: SegmentationDataset( sampler, config['input_dims'], input_overlap=((tag == 'train') and config['input_overlap']), augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), drop_last=True, pin_memory=True) for tag, dset in torch_datasets.items() } if config['class_weights']: mode = config['class_weights'] dset = torch_datasets['train'] class_weights = _precompute_class_weights(dset, mode=mode, workers=config['workers']) class_weights = torch.FloatTensor(class_weights) class_weights[dset.classes.index('background')] = 0 else: class_weights = None if config['normalize_inputs']: stats_dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)), rng=0)[0:min(1000, len(stats_dset))] stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3') input_stats = cacher.tryload() if input_stats is None: loader = torch.utils.data.DataLoader( stats_subset, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) running = nh.util.RunningStats() for batch in ub.ProgIter(loader, desc='estimate mean/std'): try: running.update(batch['im'].numpy()) except ValueError: # final batch broadcast error pass input_stats = { 'std': running.simple(axis=None)['mean'].round(3), 'mean': running.simple(axis=None)['std'].round(3), } cacher.save(input_stats) else: input_stats = {} print('input_stats = {!r}'.format(input_stats)) # TODO: infer numbr of channels model_ = (SegmentationModel, { 'arch': config['arch'], 'input_stats': input_stats, 'classes': torch_datasets['train'].classes.__json__(), 'in_channels': 3, }) initializer_ = nh.Initializer.coerce(config) # if config['init'] == 'cls': # initializer_ = model_[0]._initializer_cls() # Create hyperparameters hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=( nh.criterions.FocalLoss, { 'focus': config['focus'], 'weight': class_weights, # 'reduction': 'none', }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) # Create harness harn = SegmentationHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 2, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def setup_harn(cmdline=True, **kw): """ This creates the "The Classification Harness" (i.e. core ClfHarn object). This is where we programmatically connect our program arguments with the netharn HyperParameter standards. We are using :module:`scriptconfig` to capture these, but you could use click / argparse / etc. This function has the responsibility of creating our torch datasets, lazy computing input statistics, specifying our model architecture, schedule, initialization, optimizer, dynamics, XPU etc. These can usually be coerced using netharn API helpers and a "standardized" config dict. See the function code for details. Args: cmdline (bool, default=True): if True, behavior will be modified based on ``sys.argv``. Note this will activate the scriptconfig ``--help``, ``--dump`` and ``--config`` interactions. Kwargs: **kw: the overrides the default config for :class:`ClfConfig`. Note, command line flags have precedence if cmdline=True. Returns: ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn` object. Example: >>> # xdoctest: +SKIP >>> kw = {'datasets': 'special:shapes256'} >>> cmdline = False >>> harn = setup_harn(cmdline, **kw) >>> harn.initialize() """ import ndsampler config = ClfConfig(default=kw) config.load(cmdline=cmdline) print('config = {}'.format(ub.repr2(config.asdict()))) nh.configure_hacks(config) coco_datasets = nh.api.Datasets.coerce(config) print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1))) for tag, dset in coco_datasets.items(): dset._build_hashid(hash_pixels=False) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend']) for tag, dset in coco_datasets.items() } for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): sampler.frames.prepare(workers=config['workers']) torch_datasets = { 'train': ClfDataset( samplers['train'], input_dims=config['input_dims'], augmenter=config['augmenter'], ), 'vali': ClfDataset(samplers['vali'], input_dims=config['input_dims'], augmenter=False), } if config['normalize_inputs']: # Get stats on the dataset (todo: turn off augmentation for this) _dset = torch_datasets['train'] stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] stats_subset = torch.utils.data.Subset(_dset, stats_idxs) cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3') input_stats = cacher.tryload() channels = ChannelSpec.coerce(config['channels']) if input_stats is None: # Use parallel workers to load data faster from netharn.data.data_containers import container_collate from functools import partial collate_fn = partial(container_collate, num_devices=1) loader = torch.utils.data.DataLoader( stats_subset, collate_fn=collate_fn, num_workers=config['workers'], shuffle=True, batch_size=config['batch_size']) # Track moving average of each fused channel stream channel_stats = { key: nh.util.RunningStats() for key in channels.keys() } assert len(channel_stats) == 1, ( 'only support one fused stream for now') for batch in ub.ProgIter(loader, desc='estimate mean/std'): for key, val in batch['inputs'].items(): try: for part in val.numpy(): channel_stats[key].update(part) except ValueError: # final batch broadcast error pass perchan_input_stats = {} for key, running in channel_stats.items(): running = ub.peek(channel_stats.values()) perchan_stats = running.simple(axis=(1, 2)) perchan_input_stats[key] = { 'std': perchan_stats['mean'].round(3), 'mean': perchan_stats['std'].round(3), } input_stats = ub.peek(perchan_input_stats.values()) cacher.save(input_stats) else: input_stats = {} torch_loaders = { tag: dset.make_loader( batch_size=config['batch_size'], num_batches=config['num_batches'], num_workers=config['workers'], shuffle=(tag == 'train'), balance=(config['balance'] if tag == 'train' else None), pin_memory=True) for tag, dset in torch_datasets.items() } initializer_ = None classes = torch_datasets['train'].classes modelkw = { 'arch': config['arch'], 'input_stats': input_stats, 'classes': classes.__json__(), 'channels': channels, } model = ClfModel(**modelkw) model._initkw = modelkw if initializer_ is None: initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams(name=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model, criterion=None, optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), scheduler=nh.Scheduler.coerce(config), initializer=initializer_, monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': 0.0, }), other={ 'name': config['name'], 'batch_size': config['batch_size'], 'balance': config['balance'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), }) harn = ClfHarn(hyper=hyper) harn.preferences.update({ 'num_keep': 3, 'keep_freq': 10, 'tensorboard_groups': ['loss'], 'eager_dump_tensorboard': True, }) harn.intervals.update({}) harn.script_config = config return harn
def setup_harn(cmdline=True, **kwargs): """ cmdline, kwargs = False, {} """ import sys import ndsampler config = ImageClfConfig(default=kwargs) config.load(cmdline=cmdline) nh.configure_hacks(config) # fix opencv bugs cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3) data = cacher.tryload() if data is None: data = grab_tiny_imagenet_as_coco() cacher.save(data) coco_datasets = data # setup_coco_datasets() dset = coco_datasets['train'] print('train dset = {!r}'.format(dset)) workdir = ub.ensuredir(ub.expandpath(config['workdir'])) samplers = { # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog') tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy') for tag, dset in coco_datasets.items() } torch_datasets = { tag: ImagClfDataset( sampler, config['input_dims'], augmenter=((tag == 'train') and config['augmenter']), ) for tag, sampler in samplers.items() } torch_loaders = { tag: torch_data.DataLoader(dset, batch_size=config['batch_size'], num_workers=config['workers'], shuffle=(tag == 'train'), pin_memory=True) for tag, dset in torch_datasets.items() } import torchvision # TODO: netharn should allow for this model_ = torchvision.models.resnet50(pretrained=False) # model_ = (, { # 'classes': torch_datasets['train'].classes, # 'in_channels': 3, # }) initializer_ = nh.Initializer.coerce(config) hyper = nh.HyperParams( nice=config['nice'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), datasets=torch_datasets, loaders=torch_loaders, model=model_, initializer=initializer_, scheduler=nh.Scheduler.coerce(config), optimizer=nh.Optimizer.coerce(config), dynamics=nh.Dynamics.coerce(config), criterion=(nh.criterions.FocalLoss, { 'focus': 0.0, }), monitor=(nh.Monitor, { 'minimize': ['loss'], 'patience': config['patience'], 'max_epoch': config['max_epoch'], 'smoothing': .6, }), other={ 'batch_size': config['batch_size'], }, extra={ 'argv': sys.argv, 'config': ub.repr2(config.asdict()), } ) # Create harness harn = ImageClfHarn(hyper=hyper) harn.classes = torch_datasets['train'].classes harn.preferences.update({ 'num_keep': 5, 'keyboard_debug': True, # 'export_modules': ['netharn'], }) harn.intervals.update({ 'vali': 1, 'test': 10, }) harn.script_config = config return harn
def test_variable_backend(): """ CommandLine: xdoctest -m $HOME/code/ndsampler/tests/tests_frame_backends.py test_variable_backend --debug-validate-cog --debug-load-cog """ try: import gdal # NOQA except ImportError: import pytest pytest.skip('cog requires gdal') import ndsampler import kwcoco import ubelt as ub dpath = ub.ensure_app_cache_dir('ndsampler/tests/test_variable_backend') ub.delete(dpath) ub.ensuredir(dpath) fnames = [ 'test_rgb_255.jpg', 'test_gray_255.jpg', 'test_rgb_255.png', 'test_rgba_255.png', 'test_gray_255.png', 'test_rgb_01.tif', 'test_rgb_255.tif', 'test_rgba_01.tif', 'test_rgba_255.tif', 'test_gray_01.tif', 'test_gray_255.tif', ] import kwarray import kwimage fpaths = [] rng = kwarray.ensure_rng(0) h, w = 1200, 1055 for fname in ub.ProgIter(fnames, desc='create test data'): if 'rgb_' in fname: data = rng.rand(h, w, 3) elif 'rgba_' in fname: data = rng.rand(h, w, 4) elif 'gray_' in fname: data = rng.rand(h, w, 1) if '01' in fname: pass elif '255' in fname: data = (data * 255).astype(np.uint8) fpath = join(dpath, fname) fpaths.append(fpath) kwimage.imwrite(fpath, data) for fpath in fpaths: data = kwimage.imread(fpath) print( ub.repr2( { 'fname': basename(fpath), 'data.shape': data.shape, 'data.dtype': data.dtype, }, nl=0)) dset = kwcoco.CocoDataset.from_image_paths(fpaths) sampler = ndsampler.CocoSampler(dset, backend='cog', workdir=dpath) frames = sampler.frames # frames.prepare() if 1: for gid in frames.image_ids: print('======== < START IMAGE ID > ===============') gpath, cache_gpath = frames._gnames(gid) print('gpath = {!r}'.format(gpath)) print('cache_gpath = {!r}'.format(cache_gpath)) image = frames.load_image(gid) print('image = {!r}'.format(image)) print('image.shape = {!r}'.format(image.shape)) print('image.dtype = {!r}'.format(image.dtype)) subdata = image[0:8, 0:8] print('subdata.dtype = {!r}'.format(subdata.dtype)) print('subdata.shape = {!r}'.format(subdata.shape)) # info = ub.cmd('gdalinfo ' + cache_gpath, verbose=0) # print('GDAL INFO:') # print(ub.indent(info['out'])) # assert info['ret'] == 0 # dataset = gdal.OpenEx(cache_gpath) dataset = gdal.Open(cache_gpath, gdal.GA_ReadOnly) md = dataset.GetMetadata('IMAGE_STRUCTURE') print('md = {!r}'.format(md)) # Use dict.get method in case the metadata dict does not have a 'COMPRESSION' key compression = md.get('COMPRESSION', 'RAW') print('compression = {!r}'.format(compression)) print('======== < END IMAGE ID > ===============')