def history(self): """ if available return the history of the model as well """ import netharn as nh if self.info is None: if False: info_dpath = dirname(dirname(ub.truepath(self.fpath))) info_fpath = join(info_dpath, 'train_info.json') if exists(info_fpath): info = nh.util.read_json(info_fpath) else: info = '__UNKNOWN__' else: # TODO: check for train_info.json in a few different places snap_fpath = ub.truepath(self.fpath) candidate_paths = [ join(dirname(snap_fpath), 'train_info.json'), join(dirname(dirname(snap_fpath)), 'train_info.json'), ] info = None for info_fpath in candidate_paths: info_fpath = normpath(info_fpath) try: # Info might be inside of a zipfile info = nh.util.read_json(nh.util.zopen(info_fpath)) break except Exception as ex: pass if info is None: info = '__UNKNOWN__' else: info = self.info return info
def script_workdir(): if DEBUG: workdir = ub.ensuredir(ub.truepath('~/data/work_phase2_debug')) else: workdir = ub.ensuredir(ub.truepath('~/data/work_phase2')) workdir = ub.argval('--workdir', default=workdir) return workdir
def load_coco_datasets(): import wrangle # annot_globstr = ub.truepath('~/data/viame-challenge-2018/phase0-annotations/*.json') # annot_globstr = ub.truepath('~/data/viame-challenge-2018/phase0-annotations/mbari_seq0.mscoco.json') # img_root = ub.truepath('~/data/viame-challenge-2018/phase0-imagery') # Contest training data on hermes annot_globstr = ub.truepath( '~/data/noaa/training_data/annotations/*/*-coarse-bbox-only*.json') img_root = ub.truepath('~/data/noaa/training_data/imagery/') fpaths = sorted(glob.glob(annot_globstr)) # Remove keypoints annotation data (hack) fpaths = [p for p in fpaths if not ('nwfsc' in p or 'afsc' in p)] cacher = ub.Cacher('coco_dsets', cfgstr=ub.hash_data(fpaths), appname='viame') coco_dsets = cacher.tryload() if coco_dsets is None: print('Reading raw mscoco files') import os dsets = [] for fpath in sorted(fpaths): print('reading fpath = {!r}'.format(fpath)) dset = coco_api.CocoDataset(fpath, tag='', img_root=img_root) try: assert not dset.missing_images() except AssertionError: print('fixing image file names') hack = os.path.basename(fpath).split('-')[0].split('.')[0] dset = coco_api.CocoDataset(fpath, tag=hack, img_root=join(img_root, hack)) assert not dset.missing_images(), ub.repr2( dset.missing_images()) + 'MISSING' print(ub.repr2(dset.basic_stats())) dsets.append(dset) print('Merging') merged = coco_api.CocoDataset.union(*dsets) merged.img_root = img_root # HACK: wont need to do this for the released challenge data # probably wont hurt though # if not REAL_RUN: # merged._remove_keypoint_annotations() # merged._run_fixes() train_dset, vali_dset = wrangle.make_train_vali(merged) coco_dsets = { 'train': train_dset, 'vali': vali_dset, } cacher.save(coco_dsets) return coco_dsets
def compare_loss(): harn = setup_harness(bsize=2) harn.hyper.xpu = nh.XPU(0) harn.initialize() weights_fpath = ub.truepath( '~/code/lightnet/examples/yolo-voc/backup/weights_30000.pt') state_dict = harn.xpu.load(weights_fpath)['weights'] harn.model.module.load_state_dict(state_dict) ln_test = ub.import_module_from_path( ub.truepath('~/code/lightnet/examples/yolo-voc/test.py')) TESTFILE = ub.truepath('~/code/lightnet/examples/yolo-voc/data/test.pkl') import lightnet as ln net = ln.models.Yolo(ln_test.CLASSES, weights_fpath, ln_test.CONF_THRESH, ln_test.NMS_THRESH) net = harn.xpu.move(net) import os os.chdir(ub.truepath('~/code/lightnet/examples/yolo-voc/')) ln_dset = ln_test.CustomDataset(TESTFILE, net) ln_img, ln_label = ln_dset[0] my_img, my_label = harn.datasets['test'][0] my_targets = my_label[0][None, :] ln_targets = [ln_label] # Test model forward is the same for my image ln_outputs = net._forward(harn.xpu.move(my_img[None, :])) my_outputs = harn.model(harn.xpu.move(my_img[None, :])) seen = net.loss.seen = 99999999 ln_loss = net.loss(ln_outputs, my_targets) my_loss = harn.criterion(ln_outputs, my_targets, seen=seen) print('my_loss = {!r}'.format(my_loss)) print('ln_loss = {!r}'.format(ln_loss)) ln_brambox_loss = net.loss(ln_outputs, ln_targets) print('ln_brambox_loss = {!r}'.format(ln_brambox_loss)) inp_size = tuple(my_img.shape[-2:][::-1]) ln_tf_target = [] for anno in ln_targets[0]: anno.class_label = anno.class_id tf = ln.data.preprocess.BramboxToTensor._tf_anno(anno, inp_size, None) ln_tf_target.append(tf) ln_boxes = nh.util.Boxes(np.array(ln_tf_target)[:, 1:], 'cxywh').scale(inp_size) my_boxes = nh.util.Boxes(np.array(my_targets[0])[:, 1:], 'cxywh').scale(inp_size) nh.util.imshow(ln_img.numpy(), colorspace='rgb', fnum=1) nh.util.draw_boxes(ln_boxes, color='blue') nh.util.draw_boxes(my_boxes, color='red')
def main(): if ub.argflag(('--help', '-h')): print( ub.codeblock(''' Usage: python -m clab.live.final train --train_data_path=<path/to/UrbanMapper3D/training> python -m clab.live.final test --train_data_path=<path/to/UrbanMapper3D/testing> --test_data_path=<path/to/UrbanMapper3D/testing> --output_file=<outfile> Optional Args / Flags: --debug --serial --nopin --noprog --workdir=<path> --num_workers=<int> --batch_size=<int> ''')) sys.exit(1) train_data_path = ub.truepath( '~/remote/aretha/data/UrbanMapper3D/training') test_data_path = ub.truepath('~/remote/aretha/data/UrbanMapper3D/testing') output_file = 'prediction' # Conform to positional argument specs from challenge doc if sys.argv[1] in ['train', 'test']: if len(sys.argv) > 2 and exists(sys.argv[2]): train_data_path = sys.argv[2] if sys.argv[1] in ['test']: if len(sys.argv) > 4 and exists(sys.argv[3]): test_data_path = sys.argv[3] output_file = sys.argv[4] train_data_path = ub.argval('--train_data_path', default=train_data_path) test_data_path = ub.argval('--test_data_path', default=test_data_path) output_file = ub.argval('--output_file', default=output_file) workdir = script_workdir() if sys.argv[1] in ['train', 'test']: print('* train_data_path = {!r}'.format(train_data_path)) if sys.argv[1] in ['test']: print('* test_data_path = {!r}'.format(test_data_path)) print('* output_file = {!r}'.format(output_file)) print(' * workdir = {!r}'.format(workdir)) if sys.argv[1] == 'train': train(train_data_path) if sys.argv[1] == 'test': test(train_data_path, test_data_path, output_file)
def test_rel_file_link(): dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_file_link') ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) real_fpath = join(ub.ensuredir((dpath, 'dir1')), 'real') link_fpath = join(ub.ensuredir((dpath, 'dir2')), 'link') ub.touch(real_fpath) orig = os.getcwd() try: os.chdir(dpath) real_path = relpath(real_fpath, dpath) link_path = relpath(link_fpath, dpath) link = ub.symlink(real_path, link_path) import sys if sys.platform.startswith('win32') and isfile(link): # Note: if windows hard links the file there is no way we can # tell that it was a symlink. Just verify it exists. from ubelt import _win32_links assert _win32_links._win32_is_hardlinked(real_fpath, link_fpath) else: pointed = ub.util_links._readlink(link) resolved = ub.truepath(join(dirname(link), pointed), real=True) assert ub.truepath(real_fpath, real=True) == resolved except Exception: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) print('TEST FAILED: test_rel_link') print('real_fpath = {!r}'.format(real_fpath)) print('link_fpath = {!r}'.format(link_fpath)) print('real_path = {!r}'.format(real_path)) print('link_path = {!r}'.format(link_path)) try: if 'link' in vars(): print('link = {!r}'.format(link)) if 'pointed' in vars(): print('pointed = {!r}'.format(pointed)) if 'resolved' in vars(): print('resolved = {!r}'.format(resolved)) except Exception: print('...rest of the names are not available') raise finally: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) os.chdir(orig)
def parse_fish_data(): annot_dir = ub.truepath('~/data/viame-challenge-2018/phase0-annotations') assert exists(annot_dir) for fpath in glob.glob(join(annot_dir, '*.json')): # ub.cmd('sed -i "s/roi_category/category_id/g" {}'.format(fpath)) # self = coco.COCO(fpath) break
def regenerate_phase1_flavors(): """ Assumes original data is in a good format """ cfg = viame_wrangler.config.WrangleConfig({ 'annots': ub.truepath( '~/data/viame-challenge-2018/phase1-annotations/*/original_*.json') }) annots = cfg.annots fpaths = list(glob.glob(annots)) print('Reading raw mscoco files') for fpath in fpaths: print('reading fpath = {!r}'.format(fpath)) dset_name = os.path.basename(fpath).replace('original_', '').split('.')[0] orig_dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name) dpath = os.path.dirname(fpath) assert not orig_dset.missing_images() assert not orig_dset._find_bad_annotations() assert all([ img['has_annots'] in [True, False, None] for img in orig_dset.imgs.values() ]) print(ub.dict_hist([g['has_annots'] for g in orig_dset.imgs.values()])) make_dataset_flavors(orig_dset, dpath, dset_name)
def download_phase1_annots(): """ References: http://www.viametoolkit.org/cvpr-2018-workshop-data-challenge/challenge-data-description/ https://challenge.kitware.com/api/v1/item/5ac385f056357d4ff856e183/download https://challenge.kitware.com/girder#item/5ac385f056357d4ff856e183 CommandLine: python ~/code/baseline-viame-2018/viame_wrangler/config.py download_phase0_annots --datadir=~/data """ cfg = Config({'datadir': '~/data/viame-challenge-2018'}) dpath = ub.truepath(cfg.datadir) fname = 'phase1-annotations.tar.gz' hash = '5ac385f056357d4ff856e183' url = 'https://challenge.kitware.com/api/v1' # FIXME: broken dest = _grabdata_girder(dpath, fname, hash, url, force=False) unpacked = join(dpath, fname.split('.')[0]) if not os.path.exists(unpacked): info = ub.cmd('tar -xvzf "{}" -C "{}"'.format(dest, dpath), verbose=2, verbout=1) assert info['ret'] == 0
def get_bibtex_dict(): import ubelt as ub # HACK: custom current bibtex file possible_bib_fpaths = [ ub.truepath('./My_Library_clean.bib'), #ub.truepath('~/latex/crall-thesis-2017/My_Library_clean.bib'), ] bib_fpath = None for bib_fpath_ in possible_bib_fpaths: if exists(bib_fpath_): bib_fpath = bib_fpath_ break if bib_fpath is None: raise Exception('cant find bibtex file') # import bibtexparser from bibtexparser import bparser parser = bparser.BibTexParser() parser.ignore_nonstandard_types = True bib_text = ub.readfrom(bib_fpath) bibtex_db = parser.parse(bib_text) bibtex_dict = bibtex_db.get_entry_dict() return bibtex_dict
def ensure_voc_data(VOCDataset, dpath=None, force=False, years=[2007, 2012]): """ Download the Pascal VOC 2007 data if it does not already exist. CommandLine: python -m netharn.data.voc VOCDataset.ensure_voc_data Example: >>> # SCRIPT >>> # xdoc: +REQUIRES(--voc) >>> from netharn.data.voc import * # NOQA >>> VOCDataset.ensure_voc_data() """ if dpath is None: dpath = ub.truepath('~/data/VOC') devkit_dpath = join(dpath, 'VOCdevkit') # if force or not exists(devkit_dpath): ub.ensuredir(dpath) fpath1 = ub.grabdata( 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar', dpath=dpath) if force or not exists(join(dpath, 'VOCdevkit', 'VOCcode')): ub.cmd('tar xvf "{}" -C "{}"'.format(fpath1, dpath), verbout=1) if 2007 in years: # VOC 2007 train+validation data fpath2 = ub.grabdata( 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', dpath=dpath) if force or not exists( join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main', 'bird_trainval.txt')): ub.cmd('tar xvf "{}" -C "{}"'.format(fpath2, dpath), verbout=1) # VOC 2007 test data fpath3 = ub.grabdata( 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', dpath=dpath) if force or not exists( join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main', 'bird_test.txt')): ub.cmd('tar xvf "{}" -C "{}"'.format(fpath3, dpath), verbout=1) if 2012 in years: # VOC 2012 train+validation data fpath4 = ub.grabdata( 'https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar', dpath=dpath) if force or not exists( join(dpath, 'VOCdevkit', 'VOC2012', 'ImageSets', 'Main', 'bird_trainval.txt')): ub.cmd('tar xvf "{}" -C "{}"'.format(fpath4, dpath), verbout=1) return devkit_dpath
def _nh_data_nh_map(harn, num=10): with torch.no_grad(): postprocess = harn.model.module.postprocess # postprocess.conf_thresh = 0.001 # postprocess.nms_thresh = 0.5 batch_confusions = [] moving_ave = nh.util.util_averages.CumMovingAve() loader = harn.loaders['test'] prog = ub.ProgIter(iter(loader), desc='') for bx, batch in enumerate(prog): inputs, labels = harn.prepare_batch(batch) inp_size = np.array(inputs.shape[-2:][::-1]) outputs = harn.model(inputs) loss = harn.criterion(outputs, labels['targets'], gt_weights=labels['gt_weights'], seen=1000000000) moving_ave.update(ub.odict([ ('loss', float(loss.sum())), ('coord', harn.criterion.loss_coord), ('conf', harn.criterion.loss_conf), ('cls', harn.criterion.loss_cls), ])) average_losses = moving_ave.average() desc = ub.repr2(average_losses, nl=0, precision=2, si=True) prog.set_description(desc, refresh=False) postout = postprocess(outputs) for y in harn._measure_confusion(postout, labels, inp_size): batch_confusions.append(y) # batch_output.append((outputs.cpu().data.numpy().copy(), inp_size)) # batch_labels.append([x.cpu().data.numpy().copy() for x in labels]) if num is not None and bx >= num: break average_losses = moving_ave.average() print('average_losses {}'.format(ub.repr2(average_losses))) if False: from netharn.util import mplutil mplutil.qtensure() # xdoc: +SKIP harn.visualize_prediction(batch, outputs, postout, thresh=.1) y = pd.concat([pd.DataFrame(c) for c in batch_confusions]) precision, recall, ap = nh.metrics.detections._multiclass_ap(y) ln_test = ub.import_module_from_path(ub.truepath('~/code/lightnet/examples/yolo-voc/test.py')) num_classes = len(ln_test.LABELS) cls_labels = list(range(num_classes)) aps = nh.metrics.ave_precisions(y, cls_labels, use_07_metric=True) aps = aps.rename(dict(zip(cls_labels, ln_test.LABELS)), axis=0) # return ap return ap, aps
def test_rel_dir_link(): dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_dir_link') ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) real_dpath = join(ub.ensuredir((dpath, 'dir1')), 'real') link_dpath = join(ub.ensuredir((dpath, 'dir2')), 'link') ub.ensuredir(real_dpath) orig = os.getcwd() try: os.chdir(dpath) real_path = relpath(real_dpath, dpath) link_path = relpath(link_dpath, dpath) link = ub.symlink(real_path, link_path) # Note: on windows this is hacked. pointed = ub.util_links._readlink(link) resolved = ub.truepath(join(dirname(link), pointed), real=True) assert ub.truepath(real_dpath, real=True) == resolved except Exception: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) print('TEST FAILED: test_rel_link') print('real_dpath = {!r}'.format(real_dpath)) print('link_dpath = {!r}'.format(link_dpath)) print('real_path = {!r}'.format(real_path)) print('link_path = {!r}'.format(link_path)) try: if 'link' in vars(): print('link = {!r}'.format(link)) if 'pointed' in vars(): print('pointed = {!r}'.format(pointed)) if 'resolved' in vars(): print('resolved = {!r}'.format(resolved)) except Exception: print('...rest of the names are not available') raise finally: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) os.chdir(orig)
def history(self): """ if available return the history of the model as well """ # TODO: check for train_info.json in a few different places info_dpath = dirname(dirname(ub.truepath(self.fpath))) info_fpath = join(info_dpath, 'train_info.json') if exists(info_fpath): return util.read_json(info_fpath) else: return '__UNKNOWN__'
def __init__(cfg, kw=None, argv=None): # cfg.phase = ub.argval('--phase', default='1', argv=argv) # cfg.data_dir = ub.truepath(ub.argval('--data', default='~/data', argv=argv)) super().__init__( { 'workdir': '~/work/viame-challenge-2018', 'img_root': '~/data/viame-challenge-2018/phase1-imagery', 'annots': '~/data/viame-challenge-2018/phase1-annotations/*.json', }, kw, argv) for key in cfg._keys: cfg[key] = ub.truepath(cfg[key])
def get_task(taskname, boundary=True, arch=None): # the arch param is a hack if taskname == 'urban_mapper_3d': from clab.tasks.urban_mapper_3d import UrbanMapper3D if boundary: workdir = '~/data/work/urban_mapper2' if arch.startswith('dense_unet'): workdir = '~/data/work/urban_mapper4' else: workdir = '~/data/work/urban_mapper' root = ub.truepath('~/data/UrbanMapper3D') if not exists(root): root = ub.truepath('~/remote/aretha/data/UrbanMapper3D') if not exists(root): raise Exception('root {} does not exist'.format(root)) task = UrbanMapper3D(root=root, workdir=workdir, boundary=boundary) print(task.classnames) task.prepare_fullres_inputs() task.preprocess() else: assert False return task
def generate_phase1_data_tables(): cfg = viame_wrangler.config.WrangleConfig({ 'annots': ub.truepath( '~/data/viame-challenge-2018/phase1-annotations/*/*coarse*bbox-keypoint*.json' ) }) all_stats = {} annots = cfg.annots fpaths = list(glob.glob(annots)) print('fpaths = {}'.format(ub.repr2(fpaths))) for fpath in fpaths: dset_name = os.path.basename(fpath).split('-')[0] dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name) assert not dset.missing_images() assert not dset._find_bad_annotations() assert all([ img['has_annots'] in [True, False, None] for img in dset.imgs.values() ]) print(ub.dict_hist([g['has_annots'] for g in dset.imgs.values()])) stats = {} stats.update(ub.dict_subset(dset.basic_stats(), ['n_anns', 'n_imgs'])) roi_shapes_hist = dict() populated_cats = dict() for name, item in dset.category_annotation_type_frequency().items(): if item: populated_cats[name] = sum(item.values()) for k, v in item.items(): roi_shapes_hist[k] = roi_shapes_hist.get(k, 0) + v stats['n_cats'] = populated_cats stats['n_roi_shapes'] = roi_shapes_hist stats['n_imgs_with_annots'] = ub.map_keys( { None: 'unsure', True: 'has_objects', False: 'no_objects' }, ub.dict_hist([g['has_annots'] for g in dset.imgs.values()])) all_stats[dset_name] = stats print(ub.repr2(all_stats, nl=3, sk=1))
def copy_latest_snapshots(): train_base = ub.truepath('~/remote/aretha/data/work/urban_mapper/arch/unet/train') import glob import shutil def update_latest(train_dpath): load_path = most_recent_snapshot(train_dpath) suffix = load_path.split('/')[-1] new_path = join(train_dpath, basename(train_dpath) + suffix) print('new_path = {!r}'.format(new_path)) shutil.copy2(load_path, new_path) for train_dpath in glob.glob(train_base + '/input_*/solver_*'): if os.path.isdir(train_dpath): print('train_dpath = {!r}'.format(train_dpath)) update_latest(train_dpath)
def run_checks(): cfg = viame_wrangler.config.WrangleConfig({ 'annots': ub.truepath('~/data/viame-challenge-2018/phase1-annotations/*/*.json') }) fpaths = list(glob.glob(cfg.annots)) print('fpaths = {}'.format(ub.repr2(fpaths))) for fpath in fpaths: dset_name = os.path.basename(fpath).split('-')[0].split('.')[0] dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name) assert not dset.missing_images() assert not dset._find_bad_annotations() assert all([img['has_annots'] in [True, False, None] for img in dset.imgs.values()]) if 'original' not in dset_name: assert len(dset.cats) in [106, 21]
def cmake_clean(dpath='.'): """ """ dpath = ub.truepath(dpath) cmake_cache_fpath = join(dpath, 'CMakeCache.txt') assert exists(cmake_cache_fpath) fpath_set = set(glob.glob(join(dpath, '*'))) - {cmake_cache_fpath} for fpath in list(fpath_set): if basename(fpath).startswith('_cmake_build_backup_'): fpath_set.remove(fpath) backup_dpath = ub.ensuredir( join(dpath, '_cmake_build_backup_' + ub.timestamp())) for fpath in ub.ProgIter(fpath_set, 'moving files'): shutil.move(fpath, backup_dpath)
def current_gvim_edit(op='e', fpath=''): r""" CommandLine: python -m vimtk.xctrl XCtrl.current_gvim_edit sp ~/.bashrc """ fpath = ub.compressuser(ub.truepath(fpath)) # print('fpath = %r' % (fpath,)) cplat.copy_text_to_clipboard(fpath) doscript = [ ('focus', 'gvim'), ('key', 'Escape'), ('type2', ';' + op + ' ' + fpath), # ('type2', ';' + op + ' '), # ('key', 'ctrl+v'), ('key', 'KP_Enter'), ] XCtrl.do(*doscript, verbose=0, sleeptime=.001)
def download_phase0_annots(): """ CommandLine: python ~/code/baseline-viame-2018/viame_wrangler/config.py download_phase0_annots """ cfg = Config({'datadir': ub.truepath('~/data/viame-challenge-2018')}) dpath = cfg.datadir fname = 'phase0-annotations.tar.gz' hash = '5a9d839456357d0cb633d0e3' url = 'https://challenge.kitware.com/api/v1' dest = _grabdata_girder(dpath, fname, hash, url) unpacked = join(dpath, fname.split('.')[0]) if not os.path.exists(unpacked): info = ub.cmd('tar -xvzf "{}" -C "{}"'.format(dest, dpath), verbose=2, verbout=1) assert info['ret'] == 0
def main(): import argparse parser = argparse.ArgumentParser( prog='manage_snapshots', description=ub.codeblock( ''' Cleanup snapshots produced by netharn ''') ) parser.add_argument(*('-w', '--workdir'), type=str, help='specify the workdir for your project', default=None) parser.add_argument(*('-f', '--force'), help='dry run', action='store_false', dest='dry') # parser.add_argument(*('-n', '--dry'), help='dry run', action='store_true') parser.add_argument(*('--recent',), help='num recent to keep', type=int, default=100) parser.add_argument(*('--factor',), help='keep one every <factor> epochs', type=int, default=1) args, unknown = parser.parse_known_args() ns = args.__dict__.copy() print('ns = {!r}'.format(ns)) ns['workdir'] = ub.truepath(ns['workdir']) _devcheck_manage_snapshots(**ns)
def ignore(): # inp_size = (96, 96) inp_size = (416, 416) W = H = inp_size[0] // 32 n_classes = 3 import ubelt as ub for i in ub.ProgIter(range(1000)): data1, anchors = demo_npdata(5, W, H, inp_size=inp_size, C=n_classes, n=1000) _tup1 = build_target_item(data1, inp_size, n_classes, anchors, epoch=1) (_boxes1, _ious1, _classes1, _box_mask1, _iou_mask1, _class_mask1) = _tup1 orig_darknet = ub.import_module_from_path( ub.truepath('~/code/yolo2-pytorch/darknet.py')) orig_darknet.cfg.anchors = anchors orig_darknet.cfg.multi_scale_inp_size[0][:] = inp_size orig_darknet.cfg.multi_scale_out_size[0][:] = [W, H] orig_darknet.cfg.num_classes = n_classes pbox, piou, gbox, gcls, gw = data1 data2 = (pbox, gbox, gcls, gw, piou) _tup2 = orig_darknet._process_batch(data2, size_index=0) (_boxes2, _ious2, _classes2, _box_mask2, _iou_mask2, _class_mask2) = _tup2 if np.any(_box_mask1 != _box_mask2): raise Exception flags = ~np.isclose(_iou_mask1, _iou_mask2) if np.any(flags): print(np.where(flags)) print(_iou_mask1[flags]) print(_iou_mask2[flags]) raise Exception flags = ~np.isclose(_class_mask1, _class_mask2) if np.any(flags): raise Exception flags = ~np.isclose(_classes2, _classes1) if np.any(flags): raise Exception flags = ~np.isclose(_ious1, _ious2) if np.any(flags): raise Exception _ious1[flags] _ious2[flags] ba = _boxes1.reshape(-1, 4) bb = _boxes2.reshape(-1, 4) flags = ~np.isclose(ba, bb) if np.any(flags): print(ba[(~np.isclose(ba, bb)).sum(axis=-1) > 0]) print(bb[(~np.isclose(ba, bb)).sum(axis=-1) > 0]) raise Exception
def cifar_training_datasets(output_colorspace='RGB', norm_mode='independent', cifar_num=10): """ Example: >>> datasets = cifar_training_datasets() """ inputs, task = cifar_inputs(train=True, cifar_num=cifar_num) # split training into train / validation # 45K / 5K validation split was used in densenet and resnet papers. # https://arxiv.org/pdf/1512.03385.pdf page 7 # https://arxiv.org/pdf/1608.06993.pdf page 5 vali_frac = .1 # 10% is 5K images n_vali = int(len(inputs) * vali_frac) # n_vali = 10000 # 10K validation as in http://torch.ch/blog/2015/07/30/cifar.html # the gt indexes seem to already be scrambled, I think other papers sample # validation from the end, so lets do that # The NIN paper https://arxiv.org/pdf/1312.4400.pdf in section 4 mentions # that it uses the last 10K images for validation input_idxs = np.arange(len(inputs)) # or just uncomment this line for reproducable random sampling # input_idxs = util.random_indices(len(inputs), seed=1184576173) train_idxs = sorted(input_idxs[:-n_vali]) vali_idxs = sorted(input_idxs[-n_vali:]) train_inputs = inputs.take(train_idxs, tag='train') vali_inputs = inputs.take(vali_idxs, tag='vali') test_inputs, _ = cifar_inputs(train=False, cifar_num=cifar_num) # The dataset name and indices should fully specifiy dependencies train_inputs._set_id_from_dependency( ['cifar{}-train'.format(cifar_num), train_idxs]) vali_inputs._set_id_from_dependency( ['cifar{}-train'.format(cifar_num), vali_idxs]) test_inputs._set_id_from_dependency(['cifar{}-test'.format(cifar_num)]) workdir = ub.ensuredir(ub.truepath('~/data/work/cifar')) train_dset = CIFAR_Wrapper(train_inputs, task, workdir, output_colorspace=output_colorspace) vali_dset = CIFAR_Wrapper(vali_inputs, task, workdir, output_colorspace=output_colorspace) test_dset = CIFAR_Wrapper(test_inputs, task, workdir, output_colorspace=output_colorspace) print('built datasets') datasets = { 'train': train_dset, 'vali': vali_dset, 'test': test_dset, } print('computing normalizers') datasets['train'].center_inputs = datasets['train']._make_normalizer( norm_mode) for key in datasets.keys(): datasets[key].center_inputs = datasets['train'].center_inputs print('computed normalizers') datasets['train'].augment = True return datasets
def setup_harness(**kwargs): """ CommandLine: python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness Example: >>> harn = setup_harness(dbname='PZ_MTEST') >>> harn.initialize() """ nice = kwargs.get('nice', 'untitled') batch_size = int(kwargs.get('batch_size', 6)) bstep = int(kwargs.get('bstep', 1)) workers = int(kwargs.get('workers', 0)) decay = float(kwargs.get('decay', 0.0005)) lr = float(kwargs.get('lr', 0.001)) dim = int(kwargs.get('dim', 416)) xpu = kwargs.get('xpu', 'argv') workdir = kwargs.get('workdir', None) dbname = kwargs.get('dbname', 'ggr2') if workdir is None: workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname)) ub.ensuredir(workdir) if dbname == 'ggr2': print('Creating torch CocoDataset') train_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018', ) train_dset.hashid = 'ggr2-coco-train2018' vali_dset = ndsampler.CocoDataset( data= '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json', img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018', ) vali_dset.hashid = 'ggr2-coco-val2018' print('Creating samplers') train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir) vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir) print('Creating torch Datasets') datasets = { 'train': MatchingCocoDataset(train_sampler, train_dset, workdir, dim=dim, augment=True), 'vali': MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim), } else: from ibeis_utils import randomized_ibeis_dset datasets = randomized_ibeis_dset(dbname, dim=dim) for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) if workers > 0: import cv2 cv2.setNumThreads(0) loaders = { key: torch.utils.data.DataLoader(dset, batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True) for key, dset in datasets.items() } xpu = nh.XPU.cast(xpu) hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': workdir, 'datasets': datasets, 'loaders': loaders, 'xpu': xpu, 'model': (MatchingNetworkLP, { 'p': 2, 'input_shape': (1, 3, dim, dim), }), 'criterion': (nh.criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), 'optimizer': (torch.optim.SGD, { 'lr': lr, 'weight_decay': decay, 'momentum': 0.9, 'nesterov': True, }), 'initializer': (nh.initializers.NoOp, {}), 'scheduler': (nh.schedulers.Exponential, { 'gamma': 0.99, 'stepsize': 2, }), # 'scheduler': (nh.schedulers.ListedLR, { # 'points': { # 1: lr * 1.0, # 19: lr * 1.1, # 20: lr * 0.1, # }, # 'interpolate': True # }), 'monitor': (nh.Monitor, { 'minimize': ['loss', 'pos_dist', 'brier'], 'maximize': ['accuracy', 'neg_dist', 'mcc'], 'patience': 40, 'max_epoch': 40, }), # 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { 'n_classes': 2, }, }) harn = MatchingHarness(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = 1 harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None return harn
def setup_data(): """ Create final MSCOCO training files for the 4 challenge types: * fine-grained + bbox-only * fine-grained + bbox-keypoints * coarse-grained + bbox-only * coarse-grained + bbox-keypoints CommandLine: python ~/code/baseline-viame-2018/wrangle.py setup_data --data=$HOME/data --work=$HOME/work --phase=0 """ # cfg = viame_wrangler.config.WrangleConfig() cfg = viame_wrangler.config.WrangleConfig({ 'annots': ub.truepath('~/data/viame-challenge-2018/phase1-annotations/*/original_*.json') }) img_root = cfg.img_root annots = cfg.annots fpaths = list(glob.glob(annots)) print('Reading raw mscoco files') dsets = [] for fpath in fpaths: print('reading fpath = {!r}'.format(fpath)) dset = CocoDataset(fpath) dsets.append(dset) print('Merging') merged = CocoDataset.union(*dsets) merged.img_root = img_root # Set has_annots=True on all images with at least one annotation merged._mark_annotated_images() def ensure_heirarchy(dset, heirarchy): for cat in heirarchy: try: dset.add_category(**cat) except ValueError: realcat = dset.name_to_cat[cat['name']] realcat['supercategory'] = cat['supercategory'] prefix = 'phase{}'.format(cfg.phase) def verbose_dump(dset, fpath): print('Dumping {}'.format(fpath)) if False: print(ub.repr2(dset.category_annotation_type_frequency(), nl=1, sk=1)) print(ub.dict_hist([img['has_annots'] for img in dset.imgs.values()])) print(ub.repr2(dset.basic_stats())) dset.dump(fpath) ### FINE-GRAIND DSET ### fine = merged.copy() FineGrainedChallenge = viame_wrangler.mappings.FineGrainedChallenge fine.rename_categories(FineGrainedChallenge.raw_to_cat) ensure_heirarchy(fine, FineGrainedChallenge.heirarchy) verbose_dump(fine, join(cfg.challenge_work_dir, prefix + '-fine-bbox-keypoint.mscoco.json')) # remove keypoint annotations # Should we remove the images that have keypoints in them? fine_bbox = fine.copy() fine_bbox._remove_keypoint_annotations() verbose_dump(fine_bbox, join(cfg.challenge_work_dir, prefix + '-fine-bbox-only.mscoco.json')) ### COARSE DSET ### coarse = merged.copy() CoarseChallenge = viame_wrangler.mappings.CoarseChallenge coarse.rename_categories(CoarseChallenge.raw_to_cat) ensure_heirarchy(coarse, CoarseChallenge.heirarchy) verbose_dump(coarse, join(cfg.challenge_work_dir, prefix + '-coarse-bbox-keypoint.mscoco.json')) # remove keypoint annotations coarse_bbox = coarse.copy() coarse_bbox._remove_keypoint_annotations() verbose_dump(coarse_bbox, join(cfg.challenge_work_dir, prefix + '-coarse-bbox-only.mscoco.json')) return fine, coarse, fine_bbox, coarse_bbox
def run_demo(): """ CommandLine: python -m graphid.demo.demo_script run_demo --viz python -m graphid.demo.demo_script run_demo Example: >>> run_demo() """ from graphid import demo import matplotlib as mpl TMP_RC = { 'axes.titlesize': 12, 'axes.labelsize': int(ub.argval('--labelsize', default=8)), 'font.family': 'sans-serif', 'font.serif': 'CMU Serif', 'font.sans-serif': 'CMU Sans Serif', 'font.monospace': 'CMU Typewriter Text', 'xtick.labelsize': 12, 'ytick.labelsize': 12, # 'legend.alpha': .8, 'legend.fontsize': 12, 'legend.facecolor': 'w', } mpl.rcParams.update(TMP_RC) # ---- Synthetic data params params = { 'redun.pos': 2, 'redun.neg': 2, } # oracle_accuracy = .98 # oracle_accuracy = .90 # oracle_accuracy = (.8, 1.0) oracle_accuracy = (.85, 1.0) # oracle_accuracy = 1.0 # --- draw params VISUALIZE = ub.argflag('--viz') # QUIT_OR_EMEBED = 'embed' QUIT_OR_EMEBED = 'quit' def asint(p): return p if p is None else int(p) TARGET_REVIEW = asint(ub.argval('--target', default=None)) START = asint(ub.argval('--start', default=None)) END = asint(ub.argval('--end', default=None)) # ------------------ # rng = np.random.RandomState(42) # infr = demo.demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0) infr = demo.demodata_infr(pcc_sizes=[5, 2, 4]) infr.verbose = 100 infr.ensure_cliques() infr.ensure_full() # Dummy scoring infr.init_simulation(oracle_accuracy=oracle_accuracy, name='run_demo') # infr_gt = infr.copy() dpath = ub.ensuredir(ub.truepath('~/Desktop/demo')) if 0: ub.delete(dpath) ub.ensuredir(dpath) fig_counter = it.count(0) def show_graph(infr, title, final=False, selected_edges=None): from matplotlib import pyplot as plt if not VISUALIZE: return # TODO: rich colored text? latest = '\n'.join(infr.latest_logs()) showkw = dict( # fontsize=infr.graph.graph['fontsize'], # fontname=infr.graph.graph['fontname'], show_unreviewed_edges=True, show_inferred_same=False, show_inferred_diff=False, outof=(len(infr.aids)), # show_inferred_same=True, # show_inferred_diff=True, selected_edges=selected_edges, show_labels=True, simple_labels=True, # show_recent_review=not final, show_recent_review=False, # splines=infr.graph.graph['splines'], reposition=False, # with_colorbar=True ) verbose = infr.verbose infr.verbose = 0 infr_ = infr.copy() infr_ = infr infr_.verbose = verbose infr_.show(pickable=True, verbose=0, **showkw) infr.verbose = verbose # print('status ' + ub.repr2(infr_.status())) # infr.show(**showkw) ax = plt.gca() ax.set_title(title, fontsize=20) fig = plt.gcf() # fontsize = 22 fontsize = 12 if True: # postprocess xlabel lines = [] for line in latest.split('\n'): if False and line.startswith('ORACLE ERROR'): lines += ['ORACLE ERROR'] else: lines += [line] latest = '\n'.join(lines) if len(lines) > 10: fontsize = 16 if len(lines) > 12: fontsize = 14 if len(lines) > 14: fontsize = 12 if len(lines) > 18: fontsize = 10 if len(lines) > 23: fontsize = 8 if True: util.mplutil.adjust_subplots(top=.95, left=0, right=1, bottom=.45, fig=fig) ax.set_xlabel('\n' + latest) xlabel = ax.get_xaxis().get_label() xlabel.set_horizontalalignment('left') # xlabel.set_x(.025) # xlabel.set_x(-.6) xlabel.set_x(-2.0) # xlabel.set_fontname('CMU Typewriter Text') xlabel.set_fontname('Inconsolata') xlabel.set_fontsize(fontsize) ax.set_aspect('equal') # ax.xaxis.label.set_color('red') fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter))) fig.savefig( fpath, dpi=300, # transparent=True, edgecolor='none') # pt.save_figure(dpath=dpath, dpi=300) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs(groupby='name_label') infr.set_node_attrs('pin', 'true') node_dict = infr.graph.nodes print(ub.repr2(node_dict[1])) if VISUALIZE: infr.latest_logs() # Pin Nodes into the target groundtruth position show_graph(infr, 'target-gt') print(ub.repr2(infr.status())) infr.clear_feedback() infr.clear_name_labels() infr.clear_edges() print(ub.repr2(infr.status())) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs() infr.prioritize('prob_match') if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0: show_graph(infr, 'initial state') def on_new_candidate_edges(infr, edges): # hack updateing visual attrs as a callback if VISUALIZE: infr.update_visual_attrs() infr.on_new_candidate_edges = on_new_candidate_edges infr.params.update(**params) infr.refresh_candidate_edges() VIZ_ALL = (VISUALIZE and TARGET_REVIEW is None and START is None) print('VIZ_ALL = %r' % (VIZ_ALL, )) if VIZ_ALL or TARGET_REVIEW == 0: show_graph(infr, 'find-candidates') # _iter2 = enumerate(infr.generate_reviews(**params)) # _iter2 = list(_iter2) # assert len(_iter2) > 0 # prog = ub.ProgIter(_iter2, label='run_demo', bs=False, adjust=False, # enabled=False) count = 1 first = 1 for edge, priority in infr._generate_reviews(data=True): msg = 'review #%d, priority=%.3f' % (count, priority) print('\n----------') infr.print('pop edge {} with priority={:.3f}'.format(edge, priority)) # print('remaining_reviews = %r' % (infr.remaining_reviews()),) # Make the next review if START is not None: VIZ_ALL = count >= START if END is not None and count >= END: break infr.print(msg) if ub.allsame(infr.pos_graph.node_labels(*edge)) and first: # Have oracle make a mistake early feedback = infr.request_oracle_review(edge, accuracy=0) first -= 1 else: feedback = infr.request_oracle_review(edge) AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1 SHOW_CANDIATE_POP = True if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET): infr.print( ub.repr2(infr.task_probs['match_state'][edge], precision=4, si=True)) infr.print('len(queue) = %r' % (len(infr.queue))) # Show edge selection infr.print('Oracle will predict: ' + feedback['evidence_decision']) show_graph(infr, 'pre' + msg, selected_edges=[edge]) if count == TARGET_REVIEW: infr.EMBEDME = QUIT_OR_EMEBED == 'embed' infr.add_feedback(edge, **feedback) infr.print('len(queue) = %r' % (len(infr.queue))) # infr.apply_nondynamic_update() # Show the result if VIZ_ALL or AT_TARGET: show_graph(infr, msg) # import sys # sys.exit(1) if count == TARGET_REVIEW: break count += 1 infr.print('status = ' + ub.repr2(infr.status(extended=False))) show_graph(infr, 'post-review (#reviews={})'.format(count), final=True) if VISUALIZE: if not getattr(infr, 'EMBEDME', False): # import plottool as pt # util.mplutil.all_figures_tile() util.mplutil.show_if_requested()
def predict(): """ Currently hacked in due to limited harness support. srun -c 4 -p priority --gres=gpu:1 \ python ~/code/baseline-viame-2018/yolo_viame.py predict \ --gpu=0 """ # HACK: Load the training dataset to extract the categories # INSTEAD: Should read the categories from a deployed model coco_dsets = load_coco_datasets() categories = coco_dsets['train'].dataset['categories'] # Create a dataset to iterate through the images to predict on test_gpaths = glob.glob(ub.truepath('~/data/noaa/test_data/*/*.png')) predict_coco_dataset = { 'licenses': [], 'info': [], 'categories': categories, 'images': [{ 'id': idx, 'file_name': fpath, } for idx, fpath in enumerate(test_gpaths)], 'annotations': [], } predict_coco_dset = coco_api.CocoDataset(predict_coco_dataset, tag='predict') predict_dset = YoloCocoDataset(predict_coco_dset, train=False) # HACK: Define the path to the model weights # INSTEAD: best weights should be packaged in a model deployment load_path = ub.truepath( '~/work/viame/yolo/fit/nice/baseline1/best_snapshot.pt') # load_path = ub.truepath( # '~/work/viame/yolo/fit/nice/baseline1/torch_snapshots/_epoch_00000080.pt') # HACK: Define the model topology (because we know what we trained with) # INSTEAD: model deployment should store and abstract away the topology model = light_yolo.Yolo( **{ 'num_classes': predict_dset.num_classes, 'anchors': np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float), 'conf_thresh': 0.001, 'nms_thresh': 0.5, }) # Boilerplate code that could be abstracted away in a prediction harness xpu = nh.XPU.cast('gpu') print('xpu = {!r}'.format(xpu)) model = xpu.mount(model) snapshot_state = xpu.load(load_path) model.load_state_dict(snapshot_state['model_state_dict']) batch_size = 16 workers = 4 predict_loader = predict_dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=False) letterbox = predict_dset.letterbox # HACK: Main prediction loop # INSTEAD: Use a prediction harness to abstract these in a similar way to # the fit harness. predictions = [] with nh.util.grad_context(False): _iter = ub.ProgIter(predict_loader, desc='predicting') for bx, raw_batch in enumerate(_iter): batch_inputs, batch_labels = raw_batch inputs = xpu.variable(batch_inputs) labels = {k: xpu.variable(d) for k, d in batch_labels.items()} outputs = model(inputs) # Transform yolo outputs into the coco format postout = model.module.postprocess(outputs) indices = labels['indices'] orig_sizes = labels['orig_sizes'] inp_size = np.array(inputs.shape[-2:][::-1]) bsize = len(inputs) for ix in range(bsize): postitem = postout[ix].data.cpu().numpy() orig_size = orig_sizes[ix].data.cpu().numpy() gx = int(indices[ix].data.cpu().numpy()) gid = predict_dset.dset.dataset['images'][gx]['id'] # Unpack postprocessed predictions sboxes = postitem.reshape(-1, 6) pred_cxywh = sboxes[:, 0:4] pred_scores = sboxes[:, 4] pred_cxs = sboxes[:, 5].astype(np.int) sortx = pred_scores.argsort() pred_scores = pred_scores.take(sortx) pred_cxs = pred_cxs.take(sortx) pred_cxywh = pred_cxywh.take(sortx, axis=0) norm_boxes = nh.util.Boxes(pred_cxywh, 'cxywh') boxes = norm_boxes.scale(inp_size) pred_box = letterbox._boxes_letterbox_invert( boxes, orig_size, inp_size) pred_box = pred_box.clip(0, 0, orig_size[0], orig_size[1]) pred_xywh = pred_box.toformat('xywh').data # print(ub.repr2(pred_cxywh.tolist(), precision=2)) # print(ub.repr2(pred_xywh.tolist(), precision=2)) for xywh, cx, score in zip(pred_xywh, pred_cxs, pred_scores): if score > 0.1: cid = predict_dset.dset.dataset['categories'][cx]['id'] pred = { 'id': len(predictions) + 1, 'image_id': gid, 'category_id': cid, 'bbox': list(xywh), 'score': score, } predictions.append(pred) # if bx > 1: # break predict_coco_dset.dataset['annotations'] = predictions predict_coco_dset._build_index() with open('./viame_pred_dump.mscoco.json') as file: predict_coco_dset.dump(file) if False: import utool as ut from matplotlib import pyplot as plt gids = set([a['image_id'] for a in predict_coco_dset.anns.values()]) for gid in ut.InteractiveIter(list(gids)): try: fig = plt.figure(1) fig.clf() predict_coco_dset.show_annotation(gid=gid) fig.canvas.draw() except Exception: print('cannot draw') z = inputs[0].cpu().numpy().transpose(1, 2, 0) nh.util.imshow(z, fnum=2, colorspace='rgb')
def setup_yolo_harness(bsize=16, workers=0): """ CommandLine: python ~/code/netharn/examples/yolo_voc.py setup_yolo_harness Example: >>> # DISABLE_DOCTSET >>> harn = setup_yolo_harness() >>> harn.initialize() """ xpu = nh.XPU.cast('argv') nice = ub.argval('--nice', default='Yolo2Baseline') batch_size = int(ub.argval('--batch_size', default=bsize)) bstep = int(ub.argval('--bstep', 4)) workers = int(ub.argval('--workers', default=workers)) decay = float(ub.argval('--decay', default=0.0005)) lr = float(ub.argval('--lr', default=0.001)) ovthresh = 0.5 simulated_bsize = bstep * batch_size # We will divide the learning rate by the simulated batch size datasets = { 'train': YoloVOCDataset(years=[2007, 2012], split='trainval'), 'test': YoloVOCDataset(years=[2007], split='test'), } loaders = { key: dset.make_loader(batch_size=batch_size, num_workers=workers, shuffle=(key == 'train'), pin_memory=True, resize_rate=10 * bstep, drop_last=True) for key, dset in datasets.items() } if workers > 0: import cv2 cv2.setNumThreads(0) # assert simulated_bsize == 64, 'must be 64' # Pascal 2007 + 2012 trainval has 16551 images # Pascal 2007 test has 4952 images # In the original YOLO, one batch is 64 images, therefore: # # ONE EPOCH is 16551 / 64 = 258.609375 = 259 iterations. # # From the original YOLO VOC v2 config # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg # learning_rate=0.001 # burn_in=1000 # max_batches = 80200 # policy=steps # steps=40000,60000 # scales=.1,.1 # # However, the LIGHTNET values are # LR_STEPS = [250, 25000, 35000] # # The DARNKET STEPS ARE: # DN_STEPS = 1000, 40000, 60000, 80200 # # Based in this, the iter to batch conversion is # # Key lightnet batch numbers # >>> np.array([250, 25000, 30000, 35000, 45000]) / (16512 / 64) # array([0.9689, 96.899, 116.2790, 135.658, 174.4186]) # -> Round # array([ 1., 97., 135.]) # >>> np.array([1000, 40000, 60000, 80200]) / 258 # array([ 3.86683584, 154.67343363, 232.01015044, 310.12023443]) # -> Round # array(4, 157, 232, 310]) # array([ 3.87596899, 155.03875969, 232.55813953, 310.85271318]) if not ub.argflag('--eav'): lr_step_points = { # 0: lr * 0.1 / simulated_bsize, # burnin # 4: lr * 1.0 / simulated_bsize, 0: lr * 1.0 / simulated_bsize, 154: lr * 1.0 / simulated_bsize, 155: lr * 0.1 / simulated_bsize, 232: lr * 0.1 / simulated_bsize, 233: lr * 0.01 / simulated_bsize, } max_epoch = 311 scheduler_ = ( nh.schedulers.core.YOLOScheduler, { 'points': lr_step_points, # 'interpolate': False, 'interpolate': True, 'burn_in': 0.96899225 if ub.argflag('--eav') else 3.86683584, # number of epochs to burn_in for. approx 1000 batches? 'dset_size': len(datasets['train']), # when drop_last=False # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize, # make a multiple of batch_size because drop_last=True 'batch_size': batch_size, }) else: lr_step_points = { # dividing by batch size was one of those unpublished details 0: lr * 0.1 / simulated_bsize, 1: lr * 1.0 / simulated_bsize, 96: lr * 1.0 / simulated_bsize, 97: lr * 0.1 / simulated_bsize, 135: lr * 0.1 / simulated_bsize, 136: lr * 0.01 / simulated_bsize, } max_epoch = 176 scheduler_ = (nh.schedulers.ListedLR, { 'points': lr_step_points, 'interpolate': False, }) weights = ub.argval('--weights', default=None) if weights is None or weights == 'imagenet': weights = light_yolo.initial_imagenet_weights() elif weights == 'lightnet': weights = light_yolo.demo_voc_weights() else: print('weights = {!r}'.format(weights)) # Anchors anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) from netharn.models.yolo2 import region_loss2 # from netharn.models.yolo2 import light_region_loss hyper = nh.HyperParams( **{ 'nice': nice, 'workdir': ub.truepath('~/work/voc_yolo2'), 'datasets': datasets, # 'xpu': 'distributed(todo: fancy network stuff)', # 'xpu': 'cpu', # 'xpu': 'gpu:0,1,2,3', 'xpu': xpu, # a single dict is applied to all datset loaders 'loaders': loaders, 'model': ( light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': 0.001, # 'conf_thresh': 0.1, # make training a bit faster 'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4 }), 'criterion': ( region_loss2.RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'reduction': 32, 'seen': 0, 'coord_scale': 1.0, 'noobject_scale': 1.0, 'object_scale': 5.0, 'class_scale': 1.0, 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, }), # 'criterion': (light_region_loss.RegionLoss, { # 'num_classes': datasets['train'].num_classes, # 'anchors': anchors, # 'object_scale': 5.0, # 'noobject_scale': 1.0, # # eav version originally had a random *2 in cls loss, # # we removed, that but we can replicate it here. # 'class_scale': 1.0 if not ub.argflag('--eav') else 2.0, # 'coord_scale': 1.0, # 'thresh': 0.6, # iou_thresh # 'seen_thresh': 12800, # # 'small_boxes': not ub.argflag('--eav'), # 'small_boxes': True, # 'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0, # }), 'initializer': (nh.initializers.Pretrained, { 'fpath': weights, }), 'optimizer': ( torch.optim.SGD, { 'lr': lr_step_points[0], 'momentum': 0.9, 'dampening': 0, # multiplying by batch size was one of those unpublished details 'weight_decay': decay * simulated_bsize, }), 'scheduler': scheduler_, 'monitor': (nh.Monitor, { 'minimize': ['loss'], 'maximize': ['mAP'], 'patience': max_epoch, 'max_epoch': max_epoch, }), 'augment': datasets['train'].augmenter, 'dynamics': { # Controls how many batches to process before taking a step in the # gradient direction. Effectively simulates a batch_size that is # `bstep` times bigger. 'batch_step': bstep, }, 'other': { # Other params are not used internally, so you are free to set any # extra params specific to your algorithm, and still have them # logged in the hyperparam structure. For YOLO this is `ovthresh`. 'batch_size': batch_size, 'nice': nice, 'ovthresh': ovthresh, # used in mAP computation 'input_range': 'norm01', }, }) print('max_epoch = {!r}'.format(max_epoch)) harn = YoloHarn(hyper=hyper) harn.config['prog_backend'] = 'progiter' harn.intervals['log_iter_train'] = None harn.intervals['log_iter_test'] = None harn.intervals['log_iter_vali'] = None harn.config[ 'large_loss'] = 1000 # tell netharn when to check for divergence return harn