Ejemplo n.º 1
0
def show_if_requested(N=1):
    """
    Used at the end of tests. Handles command line arguments for saving figures

    Referencse:
        http://stackoverflow.com/questions/4325733/save-a-subplot-in-matplotlib

    """
    import matplotlib.pyplot as plt
    # Process figures adjustments from command line before a show or a save

    save_parts = ub.argflag('--saveparts')

    fpath_ = ub.argval('--save', default=None)
    if fpath_ is None:
        fpath_ = ub.argval('--saveparts', default=None)
        if fpath_ is not None:
            save_parts = True

    if save_parts:
        raise NotImplementedError
    if fpath_ is not None:
        raise NotImplementedError
    if ub.argflag('--show'):
        plt.show()
Ejemplo n.º 2
0
    def from_argv(XPU, check=True, **kwargs):
        """
        Determine what CPU/GPU device to use based on sys.argv

        CommandLine:
            python -m netharn.device XPU.from_argv --gpu=0,1

        Example:
            >>> xpu = XPU.from_argv()
            >>> print(xpu)
        """
        item = ub.argval('--xpu', default=ub.NoParam)
        if item is not ub.NoParam:
            xpu = XPU.coerce(item)
        else:
            anygpu = ub.argflag('--gpu')
            if anygpu:
                gpu_num = XPU.default_gpu()
            else:
                gpu_num = ub.argval('--gpu', default=None)
            if ub.argflag('--cpu'):
                xpu = XPU(None, check=check)
            elif gpu_num is None:
                xpu = XPU.from_auto(**kwargs)
            else:
                if gpu_num.lower() == 'none':
                    xpu = XPU(None)
                if isinstance(gpu_num, six.string_types) and ',' in gpu_num:
                    _device_ids = list(map(int, gpu_num.split(',')))
                    xpu = XPU(_device_ids, check=check)
                else:
                    xpu = XPU(int(gpu_num), check=check)
        return xpu
Ejemplo n.º 3
0
def bench_closures():
    """
    Is it faster to use a closure or pass in the variables explicitly?
    """
    import ubelt as ub
    import timerit
    import numpy as np

    # Test a nested func with vs without a closure
    def rand_complex(*shape):
        real = np.random.rand(*shape).astype(np.complex)
        imag = np.random.rand(*shape).astype(np.complex) * 1j
        mat = real + imag
        return mat

    s = int(ub.argval('--s', default='1'))
    mat1 = rand_complex(s, s)
    mat2 = rand_complex(s, s)
    N = 1000
    offset = 100

    def nested_closure():
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    def nested_explicit(mat1, mat2, N, offset):
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    ti = timerit.Timerit(int(2**11),
                         bestof=int(2**8),
                         verbose=int(ub.argval('--verbose', default='1')))

    for timer in ti.reset('nested_explicit'):
        with timer:
            nested_explicit(mat1, mat2, N, offset)

    for timer in ti.reset('nested_closure'):
        with timer:
            nested_closure()

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9,
                                             nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
Ejemplo n.º 4
0
def main():
    if ub.argflag(('--help', '-h')):
        print(
            ub.codeblock('''
            Usage:
                python -m clab.live.final train --train_data_path=<path/to/UrbanMapper3D/training>
                python -m clab.live.final test --train_data_path=<path/to/UrbanMapper3D/testing> --test_data_path=<path/to/UrbanMapper3D/testing> --output_file=<outfile>

            Optional Args / Flags:
                --debug
                --serial
                --nopin
                --noprog
                --workdir=<path>
                --num_workers=<int>
                --batch_size=<int>
            '''))
        sys.exit(1)

    train_data_path = ub.truepath(
        '~/remote/aretha/data/UrbanMapper3D/training')
    test_data_path = ub.truepath('~/remote/aretha/data/UrbanMapper3D/testing')
    output_file = 'prediction'

    # Conform to positional argument specs from challenge doc
    if sys.argv[1] in ['train', 'test']:
        if len(sys.argv) > 2 and exists(sys.argv[2]):
            train_data_path = sys.argv[2]
    if sys.argv[1] in ['test']:
        if len(sys.argv) > 4 and exists(sys.argv[3]):
            test_data_path = sys.argv[3]
            output_file = sys.argv[4]

    train_data_path = ub.argval('--train_data_path', default=train_data_path)
    test_data_path = ub.argval('--test_data_path', default=test_data_path)
    output_file = ub.argval('--output_file', default=output_file)

    workdir = script_workdir()

    if sys.argv[1] in ['train', 'test']:
        print('* train_data_path = {!r}'.format(train_data_path))
    if sys.argv[1] in ['test']:
        print('* test_data_path = {!r}'.format(test_data_path))
        print('* output_file = {!r}'.format(output_file))
    print(' * workdir = {!r}'.format(workdir))

    if sys.argv[1] == 'train':
        train(train_data_path)

    if sys.argv[1] == 'test':
        test(train_data_path, test_data_path, output_file)
Ejemplo n.º 5
0
def determine_code_dpath():
    """
    Returns a good place to put the code for the internal dependencies.

    Returns:
        PathLike: the directory where you want to store your code

    In order, the methods used for determing this are:
        * the `--codedpath` command line flag (may be undocumented in the CLI)
        * the `--codedir` command line flag (may be undocumented in the CLI)
        * the CODE_DPATH environment variable
        * the CODE_DIR environment variable
        * the directory above this script (e.g. if this is in ~/code/repo/super_setup.py then code dir resolves to ~/code)
        * the user's ~/code directory.
    """
    import os

    candidates = [
        ub.argval('--codedir', default=''),
        ub.argval('--codedpath', default=''),
        os.environ.get('CODE_DPATH', ''),
        os.environ.get('CODE_DIR', ''),
    ]
    valid = [c for c in candidates if c != '']
    if len(valid) > 0:
        code_dpath = valid[0]
    else:
        try:
            # This file should be in the top level of a repo, the directory from
            # this file should be the code directory.
            this_fpath = abspath(__file__)
            code_dpath = abspath(dirname(dirname(this_fpath)))
        except NameError:
            code_dpath = ub.expandpath('~/code')

    if not exists(code_dpath):
        code_dpath = ub.expandpath(code_dpath)

    # if CODE_DIR and not exists(CODE_DIR):
    #     import warnings
    #     warnings.warn('environment variable CODE_DIR={!r} was defined, but does not exist'.format(CODE_DIR))

    if not exists(code_dpath):
        raise Exception(
            ub.codeblock("""
            Please specify a correct code_dir using the CLI or ENV.
            code_dpath={!r} does not exist.
            """.format(code_dpath)))
    return code_dpath
Ejemplo n.º 6
0
    def from_argv(XPU, **kwargs):
        """
        Respect command line gpu and cpu argument

        CommandLine:
            python -m netharn.device XPU.from_argv --gpu=0,1

        Example:
            >>> xpu = XPU.from_argv()
            >>> print(xpu)
        """
        anygpu = ub.argflag('--gpu')
        if anygpu:
            gpu_num = XPU.default_gpu()
        else:
            gpu_num = ub.argval('--gpu', default=None)
        if ub.argflag('--cpu'):
            xpu = XPU(None)
        elif gpu_num is None:
            xpu = XPU.from_auto(**kwargs)
        else:
            if gpu_num.lower() == 'none':
                xpu = XPU(None)
            if isinstance(gpu_num, six.string_types) and ',' in gpu_num:
                devices = list(map(int, gpu_num.split(',')))
                xpu = XPU(devices)
            else:
                xpu = XPU(int(gpu_num))
        return xpu
Ejemplo n.º 7
0
def script_workdir():
    if DEBUG:
        workdir = ub.ensuredir(ub.truepath('~/data/work_phase2_debug'))
    else:
        workdir = ub.ensuredir(ub.truepath('~/data/work_phase2'))
    workdir = ub.argval('--workdir', default=workdir)
    return workdir
Ejemplo n.º 8
0
def detect_feats_main():
    import pyhesaff
    from pyhesaff._pyhesaff import grab_test_imgpath
    from pyhesaff._pyhesaff import argparse_hesaff_params
    import cv2
    import ubelt as ub

    img_fpath = grab_test_imgpath(ub.argval('--fname', default='astro.png'))
    kwargs = argparse_hesaff_params()
    print('kwargs = %r' % (kwargs, ))

    (kpts, vecs) = pyhesaff.detect_feats(img_fpath, **kwargs)

    if ub.argflag('--show'):
        # Show keypoints
        imgBGR = cv2.imread(img_fpath)
        default_showkw = dict(ori=False,
                              ell=True,
                              ell_linewidth=2,
                              ell_alpha=.4,
                              ell_color='distinct')
        print('default_showkw = %r' % (default_showkw, ))
        import utool as ut
        showkw = ut.argparse_dict(default_showkw)
        import plottool as pt
        pt.interact_keypoints.ishow_keypoints(imgBGR, kpts, vecs, **showkw)
        pt.show_if_requested()
Ejemplo n.º 9
0
    def __init__(self, inputs, task, colorspace='RGB'):

        self.inputs = inputs
        self.task = task

        self.colorspace = colorspace

        self.loader = im_loaders.np_loader
        self.rng = np.random.RandomState(432432)

        inputs_base = ub.ensuredir((task.workdir, 'inputs'))
        inputs.base_dpath = inputs_base
        if len(inputs):
            inputs.prepare_images(force=True)
            inputs.prepare_input()
            self.input_id = inputs.input_id
            self.with_gt = self.inputs.gt_paths
        else:
            self.input_id = ''

        self.augment = None
        self.im_augment = torchvision.transforms.Compose([
            RandomGamma(rng=self.rng),
            RandomBlur(rng=self.rng),
        ])
        self.rand_aff = RandomWarpAffine(self.rng)

        if self.inputs.aux_paths:
            self.aux_keys = sorted(self.inputs.aux_paths.keys())
        else:
            self.aux_keys = []

        self.center_inputs = None
        self.use_aux_diff = ub.argflag('--use_aux_diff')
        self.use_dual_gt = ub.argval('--arch', default='unet')
Ejemplo n.º 10
0
 def _parse_value(self, key, default, kwargs={}, argv=None):
     """ argv > kwargs > default """
     # constructor overrides default
     default = kwargs.get(key, default)
     # argv overrides constructor
     value = ub.argval('--' + key, default=default, argv=argv)
     setattr(self, key, value)
     self._keys.append(key)
Ejemplo n.º 11
0
def setup_datasets(workdir=None):
    if workdir is None:
        workdir = ub.expandpath('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        # torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])

    learn_dset = nh.data.MNIST(workdir,
                               transform=transform,
                               train=True,
                               download=True)

    test_dset = nh.data.MNIST(workdir,
                              transform=transform,
                              train=False,
                              download=True)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    train_dset = torch.utils.data.Subset(learn_dset, train_idx)
    vali_dset = torch.utils.data.Subset(learn_dset, vali_idx)

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }
    if not ub.argflag('--test'):
        del datasets['test']
    for tag, dset in datasets.items():
        # Construct the PCCs (positive connected components)
        # These are groups of item indices which are positive matches
        if isinstance(dset, torch.utils.data.Subset):
            labels = dset.dataset.train_labels[dset.indices]
        else:
            labels = dset.labels
        unique_labels, groupxs = kwarray.group_indices(labels.numpy())
        dset.pccs = [xs.tolist() for xs in groupxs]

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]
    return datasets, workdir
Ejemplo n.º 12
0
def setup_datasets(workdir=None):
    if workdir is None:
        workdir = ub.expandpath('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
    ])

    learn_dset = torchvision.datasets.MNIST(workdir,
                                            transform=transform,
                                            train=True,
                                            download=True)

    test_dset = torchvision.datasets.MNIST(workdir,
                                           transform=transform,
                                           train=False,
                                           download=True)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    train_dset = torch.utils.data.Subset(learn_dset, train_idx)
    vali_dset = torch.utils.data.Subset(learn_dset, vali_idx)

    classes = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
        'nine'
    ]

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }
    for tag, dset in datasets.items():
        dset.classes = classes
        dset.num_classes = len(classes)

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]
    return datasets, workdir
Ejemplo n.º 13
0
def main():
    val = ub.argval('--exit-val', default=None)

    if val == 'None':
        val = None

    if val == 'err':
        raise Exception

    if val is not None:
        val = int(val)
    return val
Ejemplo n.º 14
0
 def from_argv(XPU, **kwargs):
     """
     Respect command line gpu argument
     """
     gpu_num = ub.argval('--gpu', default=None)
     if gpu_num is None:
         xpu = XPU.available(**kwargs)
     else:
         if gpu_num.lower() == 'none':
             xpu = XPU(None)
         else:
             xpu = XPU(int(gpu_num))
     return xpu
Ejemplo n.º 15
0
def setup_demo_logger():
    import logging
    import logging.config
    from os.path import exists
    import ubelt as ub

    logconf_fpath = 'logging.conf'
    if exists(logconf_fpath):
        logging.config.fileConfig(logconf_fpath)
    else:
        level = getattr(logging, ub.argval('--level', default='INFO').upper())
        logfmt = '%(levelname)s %(name)s(%(lineno)d): %(message)s'
        logging.basicConfig(format=logfmt, level=level)
    logger.debug('Setup logging in demo script')
Ejemplo n.º 16
0
def setup_demo_logger():
    import logging
    import logging.config
    from os.path import exists
    import ubelt as ub

    logconf_fpath = 'logging.conf'
    if exists(logconf_fpath):
        logging.config.fileConfig(logconf_fpath)
    else:
        level = getattr(logging, ub.argval('--level', default='INFO').upper())
        logfmt = '%(levelname)s %(name)s(%(lineno)d): %(message)s'
        logging.basicConfig(format=logfmt, level=level)
    logger.debug('Setup logging in demo script')
Ejemplo n.º 17
0
 def dump_text(self):
     print("Dumping Profile Information")
     try:
         output_text, summary_text = self.get_text()
     except AttributeError:
         print('profile is not on')
     else:
         #profile.dump_stats('out.lprof')
         import ubelt as ub
         print(summary_text)
         suffix = ub.argval('--profname', default='')
         if suffix:
             suffix = '_' + suffix
         ub.writeto('profile_output{}.txt'.format(suffix), output_text + '\n' + summary_text)
         ub.writeto('profile_output{}.{}.txt'.format(suffix, ub.timestamp()),
                    output_text + '\n' + summary_text)
Ejemplo n.º 18
0
    pip install ubelt
    sudo apt install texlive texlive-latex-extra texlive-fonts-recommended dvipng
    sudo apt install texlive-latex-recommended texlive-latex-extra texlive-luatex latexmk -y
    sudo apt install texlive-latex-extra texlive-fonts-recommended dvipng cm-super

    luaotfload-tool --update
    pip install latex


"""
import ubelt as ub
import math
import string
from functools import partial

MODE = ub.argval('--MODE', default='full')
assert MODE in {'full', 'big', 'small'}


def build_password_strategy():
    """
    Returns information - specifically the entropy - about possible schemes,
    patterns, or strategry we might use to construct a password. We will
    analyize the security of each against.

    Example:
        password_schemes = build_password_strategy()
        print('password_schemes = {}'.format(ub.repr2(password_schemes, nl=1)))
    """
    # List different candidate patterns for remembering passwords
    password_schemes = []
Ejemplo n.º 19
0
def main():
    import click
    registery = make_netharn_registry()

    only = ub.argval('--only', default=None)
    if only is not None:
        only = only.split(',')
        registery.repos = [
            repo for repo in registery.repos if repo.name in only
        ]

    num_workers = int(ub.argval('--workers', default=8))
    if ub.argflag('--serial'):
        num_workers = 0

    protocol = ub.argval('--protocol', None)
    if ub.argflag('--https'):
        protocol = 'https'
    if ub.argflag('--http'):
        protocol = 'http'
    if ub.argflag('--ssh'):
        protocol = 'ssh'

    if protocol is not None:
        for repo in registery.repos:
            repo.set_protocol(protocol)

    default_context_settings = {
        'help_option_names': ['-h', '--help'],
        'allow_extra_args': True,
        'ignore_unknown_options': True
    }

    @click.group(context_settings=default_context_settings)
    def cli_group():
        pass

    @cli_group.add_command
    @click.command('pull', context_settings=default_context_settings)
    def pull():
        registery.apply('pull', num_workers=num_workers)

    @cli_group.add_command
    @click.command('ensure', context_settings=default_context_settings)
    def ensure():
        """
        Ensure is the live run of "check".
        """
        registery.apply('ensure', num_workers=num_workers)

    @cli_group.add_command
    @click.command('ensure_clone', context_settings=default_context_settings)
    def ensure_clone():
        registery.apply('ensure_clone', num_workers=num_workers)

    @cli_group.add_command
    @click.command('check', context_settings=default_context_settings)
    def check():
        """
        Check is just a dry run of "ensure".
        """
        registery.apply('check', num_workers=num_workers)

    @cli_group.add_command
    @click.command('status', context_settings=default_context_settings)
    def status():
        registery.apply('status', num_workers=num_workers)

    @cli_group.add_command
    @click.command('develop', context_settings=default_context_settings)
    def develop():
        registery.apply('develop', num_workers=0)

    @cli_group.add_command
    @click.command('doctest', context_settings=default_context_settings)
    def doctest():
        registery.apply('doctest')

    @cli_group.add_command
    @click.command('versions', context_settings=default_context_settings)
    def versions():
        registery.apply('versions')

    cli_group()
Ejemplo n.º 20
0
def run_demo():
    """
    CommandLine:
        python -m graphid.demo.demo_script run_demo --viz
        python -m graphid.demo.demo_script run_demo

    Example:
        >>> run_demo()
    """
    from graphid import demo
    import matplotlib as mpl
    TMP_RC = {
        'axes.titlesize': 12,
        'axes.labelsize': int(ub.argval('--labelsize', default=8)),
        'font.family': 'sans-serif',
        'font.serif': 'CMU Serif',
        'font.sans-serif': 'CMU Sans Serif',
        'font.monospace': 'CMU Typewriter Text',
        'xtick.labelsize': 12,
        'ytick.labelsize': 12,
        # 'legend.alpha': .8,
        'legend.fontsize': 12,
        'legend.facecolor': 'w',
    }
    mpl.rcParams.update(TMP_RC)
    # ---- Synthetic data params
    params = {
        'redun.pos': 2,
        'redun.neg': 2,
    }
    # oracle_accuracy = .98
    # oracle_accuracy = .90
    # oracle_accuracy = (.8, 1.0)
    oracle_accuracy = (.85, 1.0)
    # oracle_accuracy = 1.0

    # --- draw params

    VISUALIZE = ub.argflag('--viz')
    # QUIT_OR_EMEBED = 'embed'
    QUIT_OR_EMEBED = 'quit'

    def asint(p):
        return p if p is None else int(p)

    TARGET_REVIEW = asint(ub.argval('--target', default=None))
    START = asint(ub.argval('--start', default=None))
    END = asint(ub.argval('--end', default=None))

    # ------------------

    # rng = np.random.RandomState(42)
    # infr = demo.demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0)
    # infr = demo.demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0)
    # infr = demo.demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0)
    infr = demo.demodata_infr(pcc_sizes=[5, 2, 4])
    infr.verbose = 100
    infr.ensure_cliques()
    infr.ensure_full()
    # Dummy scoring

    infr.init_simulation(oracle_accuracy=oracle_accuracy, name='run_demo')
    # infr_gt = infr.copy()
    dpath = ub.ensuredir(ub.truepath('~/Desktop/demo'))
    if 0:
        ub.delete(dpath)
    ub.ensuredir(dpath)

    fig_counter = it.count(0)

    def show_graph(infr, title, final=False, selected_edges=None):
        from matplotlib import pyplot as plt
        if not VISUALIZE:
            return
        # TODO: rich colored text?
        latest = '\n'.join(infr.latest_logs())
        showkw = dict(
            # fontsize=infr.graph.graph['fontsize'],
            # fontname=infr.graph.graph['fontname'],
            show_unreviewed_edges=True,
            show_inferred_same=False,
            show_inferred_diff=False,
            outof=(len(infr.aids)),
            # show_inferred_same=True,
            # show_inferred_diff=True,
            selected_edges=selected_edges,
            show_labels=True,
            simple_labels=True,
            # show_recent_review=not final,
            show_recent_review=False,
            # splines=infr.graph.graph['splines'],
            reposition=False,
            # with_colorbar=True
        )
        verbose = infr.verbose
        infr.verbose = 0
        infr_ = infr.copy()
        infr_ = infr
        infr_.verbose = verbose
        infr_.show(pickable=True, verbose=0, **showkw)
        infr.verbose = verbose
        # print('status ' + ub.repr2(infr_.status()))
        # infr.show(**showkw)
        ax = plt.gca()
        ax.set_title(title, fontsize=20)
        fig = plt.gcf()
        # fontsize = 22
        fontsize = 12
        if True:
            # postprocess xlabel
            lines = []
            for line in latest.split('\n'):
                if False and line.startswith('ORACLE ERROR'):
                    lines += ['ORACLE ERROR']
                else:
                    lines += [line]
            latest = '\n'.join(lines)
            if len(lines) > 10:
                fontsize = 16
            if len(lines) > 12:
                fontsize = 14
            if len(lines) > 14:
                fontsize = 12
            if len(lines) > 18:
                fontsize = 10

            if len(lines) > 23:
                fontsize = 8

        if True:
            util.mplutil.adjust_subplots(top=.95,
                                         left=0,
                                         right=1,
                                         bottom=.45,
                                         fig=fig)
            ax.set_xlabel('\n' + latest)
            xlabel = ax.get_xaxis().get_label()
            xlabel.set_horizontalalignment('left')
            # xlabel.set_x(.025)
            # xlabel.set_x(-.6)
            xlabel.set_x(-2.0)
            # xlabel.set_fontname('CMU Typewriter Text')
            xlabel.set_fontname('Inconsolata')
            xlabel.set_fontsize(fontsize)
        ax.set_aspect('equal')

        # ax.xaxis.label.set_color('red')
        fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter)))
        fig.savefig(
            fpath,
            dpi=300,
            # transparent=True,
            edgecolor='none')

        # pt.save_figure(dpath=dpath, dpi=300)
        infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs(groupby='name_label')
        infr.set_node_attrs('pin', 'true')
        node_dict = infr.graph.nodes
        print(ub.repr2(node_dict[1]))

    if VISUALIZE:
        infr.latest_logs()
        # Pin Nodes into the target groundtruth position
        show_graph(infr, 'target-gt')

    print(ub.repr2(infr.status()))
    infr.clear_feedback()
    infr.clear_name_labels()
    infr.clear_edges()
    print(ub.repr2(infr.status()))
    infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs()

    infr.prioritize('prob_match')
    if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0:
        show_graph(infr, 'initial state')

    def on_new_candidate_edges(infr, edges):
        # hack updateing visual attrs as a callback
        if VISUALIZE:
            infr.update_visual_attrs()

    infr.on_new_candidate_edges = on_new_candidate_edges

    infr.params.update(**params)
    infr.refresh_candidate_edges()

    VIZ_ALL = (VISUALIZE and TARGET_REVIEW is None and START is None)
    print('VIZ_ALL = %r' % (VIZ_ALL, ))

    if VIZ_ALL or TARGET_REVIEW == 0:
        show_graph(infr, 'find-candidates')

    # _iter2 = enumerate(infr.generate_reviews(**params))
    # _iter2 = list(_iter2)
    # assert len(_iter2) > 0

    # prog = ub.ProgIter(_iter2, label='run_demo', bs=False, adjust=False,
    #                    enabled=False)
    count = 1
    first = 1
    for edge, priority in infr._generate_reviews(data=True):
        msg = 'review #%d, priority=%.3f' % (count, priority)
        print('\n----------')
        infr.print('pop edge {} with priority={:.3f}'.format(edge, priority))
        # print('remaining_reviews = %r' % (infr.remaining_reviews()),)
        # Make the next review

        if START is not None:
            VIZ_ALL = count >= START

        if END is not None and count >= END:
            break

        infr.print(msg)
        if ub.allsame(infr.pos_graph.node_labels(*edge)) and first:
            # Have oracle make a mistake early
            feedback = infr.request_oracle_review(edge, accuracy=0)
            first -= 1
        else:
            feedback = infr.request_oracle_review(edge)

        AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1

        SHOW_CANDIATE_POP = True
        if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET):
            infr.print(
                ub.repr2(infr.task_probs['match_state'][edge],
                         precision=4,
                         si=True))
            infr.print('len(queue) = %r' % (len(infr.queue)))
            # Show edge selection
            infr.print('Oracle will predict: ' + feedback['evidence_decision'])
            show_graph(infr, 'pre' + msg, selected_edges=[edge])

        if count == TARGET_REVIEW:
            infr.EMBEDME = QUIT_OR_EMEBED == 'embed'
        infr.add_feedback(edge, **feedback)
        infr.print('len(queue) = %r' % (len(infr.queue)))
        # infr.apply_nondynamic_update()
        # Show the result
        if VIZ_ALL or AT_TARGET:
            show_graph(infr, msg)
            # import sys
            # sys.exit(1)
        if count == TARGET_REVIEW:
            break
        count += 1

    infr.print('status = ' + ub.repr2(infr.status(extended=False)))
    show_graph(infr, 'post-review (#reviews={})'.format(count), final=True)

    if VISUALIZE:
        if not getattr(infr, 'EMBEDME', False):
            # import plottool as pt
            # util.mplutil.all_figures_tile()
            util.mplutil.show_if_requested()
Ejemplo n.º 21
0
def main():

    registery = make_netharn_registry()

    only = ub.argval('--only', default=None)
    if only is not None:
        only = only.split(',')
        registery.repos = [
            repo for repo in registery.repos if repo.name in only
        ]

    num_workers = int(ub.argval('--workers', default=8))
    if ub.argflag('--serial'):
        num_workers = 0

    protocol = ub.argval('--protocol', None)
    if ub.argflag('--https'):
        protocol = 'https'
    if ub.argflag('--http'):
        protocol = 'http'
    if ub.argflag('--ssh'):
        protocol = 'ssh'

    HACK_PROTOCOL = True
    if HACK_PROTOCOL:
        if protocol is None:
            # Try to determine if you are using ssh or https and default to that
            main_repo = None
            for repo in registery.repos:
                if repo.name == 'netharn':
                    main_repo = repo
                    break
            assert main_repo is not None
            for remote in repo.pygit.remotes:
                for url in list(remote.urls):
                    gurl1 = GitURL(url)
                    gurl2 = GitURL(repo.url)
                    if gurl2.parts()['path'] == gurl1.parts()['path']:
                        if gurl1.parts()['syntax'] == 'ssh':
                            protocol = 'ssh'
                        else:
                            protocol = 'https'
                        break
                if protocol is not None:
                    print('Found default protocol = {}'.format(protocol))
                    break

    if protocol is not None:
        for repo in registery.repos:
            repo.set_protocol(protocol)

    default_context_settings = {
        'help_option_names': ['-h', '--help'],
        'allow_extra_args': True,
        'ignore_unknown_options': True
    }

    @click.group(context_settings=default_context_settings)
    def cli_group():
        pass

    @cli_group.add_command
    @click.command('pull', context_settings=default_context_settings)
    def pull():
        registery.apply('pull', num_workers=num_workers)

    @cli_group.add_command
    @click.command('ensure', context_settings=default_context_settings)
    def ensure():
        """
        Ensure is the live run of "check".
        """
        registery.apply('ensure', num_workers=num_workers)

    @cli_group.add_command
    @click.command('ensure_clone', context_settings=default_context_settings)
    def ensure_clone():
        registery.apply('ensure_clone', num_workers=num_workers)

    @cli_group.add_command
    @click.command('check', context_settings=default_context_settings)
    def check():
        """
        Check is just a dry run of "ensure".
        """
        registery.apply('check', num_workers=num_workers)

    @cli_group.add_command
    @click.command('status', context_settings=default_context_settings)
    def status():
        registery.apply('status', num_workers=num_workers)

    @cli_group.add_command
    @click.command('develop', context_settings=default_context_settings)
    def develop():
        registery.apply('develop', num_workers=0)

    @cli_group.add_command
    @click.command('doctest', context_settings=default_context_settings)
    def doctest():
        registery.apply('doctest')

    @cli_group.add_command
    @click.command('versions', context_settings=default_context_settings)
    def versions():
        registery.apply('versions')

    cli_group()
Ejemplo n.º 22
0
def simple_pipeline():
    """
    Processing_with_species_id.m is their main file

    OpenCV2:
        cd ~/code/VIAME/plugins/camtrawl/python
        workon_py2
        source ~/code/VIAME/build/install/setup_viame.sh
        # Ensure python and sprokit knows about our module
        export PYTHONPATH=$(pwd):$PYTHONPATH
        export KWIVER_DEFAULT_LOG_LEVEL=info
        export SPROKIT_PYTHON_MODULES=kwiver.processes:viame.processes:camtrawl_processes

        python ~/code/VIAME/plugins/camtrawl/python/run_camtrawl.py --dataset=demo

    OpenCV3:
        cd ~/code/VIAME/plugins/camtrawl/python
        workon_py2
        source ~/code/VIAME/build-cv3-py2/install/setup_viame.sh
        # Ensure python and sprokit knows about our module
        export PYTHONPATH=$(pwd):$PYTHONPATH
        export KWIVER_DEFAULT_LOG_LEVEL=info
        export SPROKIT_PYTHON_MODULES=kwiver.processes:viame.processes:camtrawl_processes

        python ~/code/VIAME/plugins/camtrawl/python/run_camtrawl.py --dataset=demo

        /home/joncrall/code/VIAME/build-cv3-py2/install/bin/pipeline_runner -p /home/joncrall/.cache/sprokit/temp_pipelines/temp_pipeline_file.pipe -S pythread_per_process
    """

    # Setup the input files
    import ubelt as ub
    dataset = ub.argval('--dataset', default='demo')

    if dataset == 'demo':
        import zipfile
        from os.path import commonprefix
        dpath = ub.ensure_app_cache_dir('camtrawl')
        try:
            demodata_zip = ub.grabdata(
                'http://acidalia:8000/data/camtrawl_demodata.zip', dpath=dpath)
        except Exception:
            raise ValueError(
                'Demo data is currently only available on Kitware VPN')
        with zipfile.ZipFile(demodata_zip) as zfile:
            dname = commonprefix(zfile.namelist())
            data_fpath = join(dpath, dname)
            if not exists(data_fpath):
                zfile.extractall(dpath)

        print('data_fpath = {!r}'.format(data_fpath))

        cal_fpath = join(data_fpath, 'cal.npz')
        datakw = {
            'data_fpath': data_fpath,
            'img_path1': join(data_fpath, 'left'),
            'img_path2': join(data_fpath, 'right'),
        }
        print('datakw = {!r}'.format(datakw))
    elif dataset == 'test':
        data_fpath = expanduser('~/data/autoprocess_test_set')
        cal_fpath = join(data_fpath, 'cal_201608.mat')
        datakw = {
            'data_fpath': data_fpath,
            'img_path1': join(data_fpath, 'image_data/left'),
            'img_path2': join(data_fpath, 'image_data/right'),
        }
    elif dataset == 'haul83-small':
        data_fpath = expanduser('~/data/camtrawl_stereo_sample_data_small')
        cal_fpath = join(data_fpath,
                         '201608_calibration_data/selected/Camtrawl_2016.npz')
        datakw = {
            'data_fpath': data_fpath,
            'img_path1': join(data_fpath, 'Haul_83/left'),
            'img_path2': join(data_fpath, 'Haul_83/right'),
        }
    elif dataset == 'haul83':
        data_fpath = expanduser('~/data/camtrawl_stereo_sample_data/')
        cal_fpath = join(data_fpath,
                         '201608_calibration_data/selected/Camtrawl_2016.npz')
        datakw = {
            'data_fpath':
            data_fpath,
            'img_path1':
            join(
                data_fpath,
                'Haul_83/D20160709-T021759/images/AB-800GE_00-0C-DF-06-40-BF'
            ),  # left
            'img_path2':
            join(
                data_fpath,
                'Haul_83/D20160709-T021759/images/AM-800GE_00-0C-DF-06-20-47'
            ),  # right
            'start_frame':
            2000,
            'end_frame':
            5000,
        }
    else:
        import argparse
        import os
        parser = argparse.ArgumentParser(description='Camtrawl pipline demo')
        parser.add_argument(
            '--cal',
            help='path to matlab or numpy stereo calibration file',
            default='cal.npz')
        parser.add_argument('--left',
                            help='path to directory containing left images',
                            default='left')
        parser.add_argument('--right',
                            help='path to directory containing right images',
                            default='right')
        args = parser.parse_args()
        config = args.__dict__.copy()
        img_path1, img_path2, cal_fpath = ub.take(config,
                                                  ['left', 'right', 'cal'])
        data_fpath = os.path.dirname(img_path1)
        datakw = {
            'data_fpath': data_fpath,
            'img_path1': img_path1,
            'img_path2': img_path2,
        }
        if not exists(img_path1):
            raise IOError(
                'left image path {!r} does not exist'.format(img_path1))
        if not exists(img_path2):
            raise IOError(
                'right image path {!r} does not exist'.format(img_path2))
        if not exists(cal_fpath):
            raise IOError(
                'calibration file path {!r} does not exist'.format(cal_fpath))

        # raise KeyError('Unknown dataset {}'.format(dataset))

    make_image_input_files(**datakw)

    def add_stereo_camera_branch(pipe, prefix):
        """
        Helper that defines a single branch, so it can easilly be duplicated.
        """
        image_list_file = join(data_fpath, prefix + 'images.txt')
        cam = {}

        # --- Node ---
        cam['imread'] = imread = pipe.add_process(name=prefix + 'imread',
                                                  type='frame_list_input',
                                                  config={
                                                      'image_list_file':
                                                      image_list_file,
                                                      'frame_time': 0.03333333,
                                                      'image_reader:type':
                                                      'ocv',
                                                  })
        # ------------

        # --- Node ---
        cam['detect'] = detect = pipe.add_process(name=prefix + 'detect',
                                                  type='camtrawl_detect_fish',
                                                  config={})
        detect.iports.connect({
            'image': imread.oports['image'],
            # 'image_file_name': imread.oports['image_file_name'],
        })
        # ------------
        return cam

    pipe = define_pipeline.Pipeline()
    cam1 = add_stereo_camera_branch(pipe, 'cam1_')
    cam2 = add_stereo_camera_branch(pipe, 'cam2_')

    # stereo_cameras = pipe.add_process(
    #     name='stereo_cameras', type='stereo_calibration_camera_reader',
    #     config={
    #         # 'cal_fpath': cal_fpath,
    #     })

    # ------
    pipe.add_process(name='measure',
                     type='camtrawl_measure',
                     config={
                         'cal_fpath': cal_fpath,
                         'output_fpath': './camtrawl_out.csv',
                     })
    pipe['measure'].iports.connect({
        # 'camera1': stereo_cameras.oports['camera1'],
        # 'camera2': stereo_cameras.oports['camera2'],
        'detected_object_set1':
        cam1['detect'].oports['detected_object_set'],
        'detected_object_set2':
        cam2['detect'].oports['detected_object_set'],
        'image_file_name1':
        cam1['imread'].oports['image_file_name'],
        'image_file_name2':
        cam2['imread'].oports['image_file_name'],
    })
    # ------

    pipe.config['_pipeline:_edge']['capacity'] = 1
    pipe.config['_scheduler']['type'] = 'pythread_per_process'

    # pipe.draw_graph('pipeline.png')
    # import ubelt as ub
    # ub.startfile('pipeline.png')

    print('  --- RUN PIPELINE ---')
    import ubelt as ub
    with ub.Timer('Running Pipeline'):
        pipe.run()

    return pipe
Ejemplo n.º 23
0
def task_fit(taskname):
    """

    CommandLine:
        python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet
        python -m clab.live.sseg_train task_fit --task=camvid --arch=unet
        python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --dry

        python -m clab.live.sseg_train task_fit --task=camvid --arch=unet --colorspace=RGB
        python -m clab.live.sseg_train task_fit --task=camvid --arch=unet --colorspace=LAB

        python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --colorspace=RGB
        python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --colorspace=LAB

        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=segnet

        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --noaux
        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet

        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --dry

        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --colorspace=RGB --all
        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --colorspace=RGB

        python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --dry

    Script:
        >>> from clab.fit_harness import *
        >>> taskname = ub.argval('--task', default='camvid')
        >>> harn = task_fit(taskname)
        >>> #import utool as ut
        >>> #ut.exec_func_src(task_fit)
    """

    colorspace = ub.argval('--colorspace', default='RGB').upper()

    datasets = load_task_dataset(taskname, colorspace=colorspace)
    datasets['train'].augment = True

    # Make sure we use consistent normalization
    # TODO: give normalization a part of the hashid
    # TODO: save normalization type with the model
    center_inputs = datasets['train']._make_normalizer()
    datasets['test'].center_inputs = center_inputs
    datasets['vali'].center_inputs = center_inputs

    # Ensure normalization is the same for each dataset
    datasets['train'].augment = True

    # turn off aux layers
    if ub.argflag('--noaux'):
        for v in datasets.values():
            v.aux_keys = []

    arch = ub.argval('--arch', default='unet')
    batch_size = 6
    if arch == 'segnet':
        batch_size = 6

    n_classes = datasets['train'].n_classes
    n_channels = datasets['train'].n_channels
    class_weights = datasets['train'].class_weights()
    ignore_label = datasets['train'].ignore_label

    print('n_classes = {!r}'.format(n_classes))
    print('n_channels = {!r}'.format(n_channels))
    print('batch_size = {!r}'.format(batch_size))

    hyper = hyperparams.HyperParams(
        criterion=(criterions.CrossEntropyLoss2D, {
            'ignore_label': ignore_label,
            'weight': class_weights,
        }),
        optimizer=(torch.optim.SGD, {
            'weight_decay': .0005,
            'momentum': 0.9,
            'nesterov': True,
        }),
        # optimizer=(torch.optim.Adam, {
        #     'weight_decay': .0005,
        # }),
        # scheduler=('Constant', {}),
        scheduler=('Exponential', {
            'gamma': 0.99,
            'base_lr': 0.001,
            'stepsize': 2,
        }),
        other={
            'n_classes': n_classes,
            'n_channels': n_channels,
            'augment': datasets['train'].augment,
            'colorspace': datasets['train'].colorspace,
        })

    if arch == 'segnet':
        pretrained = 'vgg'
    else:
        pretrained = None

    train_dpath, test_dpath = directory_structure(
        datasets['train'].task.workdir,
        arch,
        datasets,
        pretrained=pretrained,
        train_hyper_id=hyper.hyper_id(),
        suffix='_' + hyper.other_id())

    def custom_metrics(harn, output, label):
        ignore_label = datasets['train'].ignore_label
        labels = datasets['train'].task.labels

        metrics_dict = metrics._sseg_metrics(output,
                                             label,
                                             labels=labels,
                                             ignore_label=ignore_label)
        return metrics_dict

    print('arch = {!r}'.format(arch))
    dry = ub.argflag('--dry')
    if dry:
        model = None
    elif arch == 'segnet':
        model = models.SegNet(in_channels=n_channels, n_classes=n_classes)
        model.init_he_normal()
        model.init_vgg16_params()
    elif arch == 'linknet':
        model = models.LinkNet(in_channels=n_channels, n_classes=n_classes)
    elif arch == 'unet':
        model = models.UNet(in_channels=n_channels, n_classes=n_classes)
        model.init_he_normal()
    elif arch == 'dummy':
        model = models.SSegDummy(in_channels=n_channels, n_classes=n_classes)
    else:
        raise ValueError('unknown arch')

    xpu = xpu_device.XPU.from_argv()
    harn = fit_harness.FitHarness(
        model=model,
        hyper=hyper,
        datasets=datasets,
        xpu=xpu,
        train_dpath=train_dpath,
        dry=dry,
        batch_size=batch_size,
    )
    harn.add_batch_metric_hook(custom_metrics)

    # HACK
    # im = datasets['train'][0][0]
    # w, h = im.shape[-2:]
    # single_output_shape = (n_classes, w, h)
    # harn.single_output_shape = single_output_shape
    # print('harn.single_output_shape = {!r}'.format(harn.single_output_shape))

    harn.run()
    return harn
Ejemplo n.º 24
0
def bench_find_optimal_blocksize():
    r"""
    This function can help find the optimal blocksize for your usecase:w

    Notes:

        # Usage
        cd ~/code/ubelt/dev
        xdoctest bench_hash_file.py bench_find_optimal_blocksize \
            --dpath <PATH-TO-HDD-OR-SDD> \
            --size <INT-IN-MB> \
            --hash_algo <ALGO_NAME> \

        # Benchmark on an HDD
        xdoctest bench_hash_file.py bench_find_optimal_blocksize \
            --size 500 \
            --dpath $HOME/raid/data/tmp \
            --hash_algo xx64

        # Benchmark on an SSD
        xdoctest bench_hash_file.py bench_find_optimal_blocksize \
            --size 500 \
            --dpath $HOME/.cache/ubelt/tmp \
            --hash_algo xx64


        # Test a small file
        xdoctest bench_hash_file.py bench_find_optimal_blocksize \
            --size 1 \
            --dpath $HOME/.cache/ubelt/tmp \
            --hash_algo xx64

        Throughout our tests on SSDs / HDDs with small and large files
        we are finding a chunksize of 2 ** 20 consistently working best with
        xx64.

        # Test with a slower hash algo
        xdoctest bench_hash_file.py bench_find_optimal_blocksize \
            --size 500 \
            --dpath $HOME/raid/data/tmp \
            --hash_algo sha1

        Even that shows 2 ** 20 working well.
    """
    import os
    import numpy as np
    import timerit

    dpath = ub.argval('--dpath', default=None)

    if dpath is None:
        # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp'))
        dpath = ub.ensure_app_cache_dir('ubelt/hash_test')
    else:
        ub.ensuredir(dpath)

    print('dpath = {!r}'.format(dpath))

    target_size = int(ub.argval('--size', default=600))
    hash_algo = ub.argval('--hash_algo', default='xx64')

    print('hash_algo = {!r}'.format(hash_algo))
    print('target_size = {!r}'.format(target_size))

    # Write a big file (~600 MB)
    MB = int(2 ** 20)
    size_pool = [target_size]
    rng = random.Random(0)
    # pool_size = max(target_size // 2, 1)
    # pool_size = max(1, target_size // 10)
    pool_size = 8
    part_pool = [_random_data(rng, MB) for _ in range(pool_size)]
    fpath = _write_random_file(dpath, part_pool, size_pool, rng)
    print('fpath = {!r}'.format(fpath))

    size_mb = os.stat(fpath).st_size / MB
    print('file size = {!r} MB'.format(size_mb))

    ti = timerit.Timerit(4, bestof=2, verbose=2)

    results = []

    # Find an optimal constant blocksize
    min_power = 16
    max_power = 24
    blocksize_candiates = [int(2 ** e) for e in range(min_power, max_power)]

    for blocksize in blocksize_candiates:
        for timer in ti.reset('constant blocksize=2 ** {} = {}'.format(np.log2(float(blocksize)), blocksize)):
            result = ub.hash_file(fpath, blocksize=blocksize, hasher=hash_algo)
            results.append(result)

    print('ti.rankings = {}'.format(ub.repr2(ti.rankings, nl=2, align=':')))
    assert ub.allsame(results)
Ejemplo n.º 25
0
def setup_yolo_harness(bsize=16, workers=0):
    """
    CommandLine:
        python ~/code/netharn/examples/yolo_voc.py setup_yolo_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_yolo_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True,
                              resize_rate=10 * bstep,
                              drop_last=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    # assert simulated_bsize == 64, 'must be 64'

    # Pascal 2007 + 2012 trainval has 16551 images
    # Pascal 2007 test has 4952 images
    # In the original YOLO, one batch is 64 images, therefore:
    #
    # ONE EPOCH is 16551 / 64 = 258.609375 = 259 iterations.
    #
    # From the original YOLO VOC v2 config
    # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg
    #     learning_rate=0.001
    #     burn_in=1000
    #     max_batches = 80200
    #     policy=steps
    #     steps=40000,60000
    #     scales=.1,.1
    #
    # However, the LIGHTNET values are
    #   LR_STEPS = [250, 25000, 35000]
    #
    # The DARNKET STEPS ARE:
    #   DN_STEPS = 1000, 40000, 60000, 80200
    #
    # Based in this, the iter to batch conversion is
    #
    # Key lightnet batch numbers
    # >>> np.array([250, 25000, 30000, 35000, 45000]) / (16512 / 64)
    # array([0.9689,  96.899, 116.2790, 135.658, 174.4186])
    # -> Round
    # array([  1.,  97., 135.])
    # >>> np.array([1000, 40000, 60000, 80200]) / 258
    # array([  3.86683584, 154.67343363, 232.01015044, 310.12023443])
    # -> Round
    # array(4, 157, 232, 310])
    # array([  3.87596899, 155.03875969, 232.55813953, 310.85271318])
    if not ub.argflag('--eav'):
        lr_step_points = {
            # 0:   lr * 0.1 / simulated_bsize,  # burnin
            # 4:   lr * 1.0 / simulated_bsize,
            0: lr * 1.0 / simulated_bsize,
            154: lr * 1.0 / simulated_bsize,
            155: lr * 0.1 / simulated_bsize,
            232: lr * 0.1 / simulated_bsize,
            233: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 311
        scheduler_ = (
            nh.schedulers.core.YOLOScheduler,
            {
                'points': lr_step_points,
                # 'interpolate': False,
                'interpolate': True,
                'burn_in': 0.96899225 if ub.argflag('--eav') else
                3.86683584,  # number of epochs to burn_in for. approx 1000 batches?
                'dset_size': len(datasets['train']),  # when drop_last=False
                # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize,  # make a multiple of batch_size because drop_last=True
                'batch_size': batch_size,
            })
    else:
        lr_step_points = {
            # dividing by batch size was one of those unpublished details
            0: lr * 0.1 / simulated_bsize,
            1: lr * 1.0 / simulated_bsize,
            96: lr * 1.0 / simulated_bsize,
            97: lr * 0.1 / simulated_bsize,
            135: lr * 0.1 / simulated_bsize,
            136: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 176
        scheduler_ = (nh.schedulers.ListedLR, {
            'points': lr_step_points,
            'interpolate': False,
        })

    weights = ub.argval('--weights', default=None)
    if weights is None or weights == 'imagenet':
        weights = light_yolo.initial_imagenet_weights()
    elif weights == 'lightnet':
        weights = light_yolo.demo_voc_weights()
    else:
        print('weights = {!r}'.format(weights))

    # Anchors
    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    from netharn.models.yolo2 import region_loss2
    # from netharn.models.yolo2 import light_region_loss

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.truepath('~/work/voc_yolo2'),
            'datasets':
            datasets,

            # 'xpu': 'distributed(todo: fancy network stuff)',
            # 'xpu': 'cpu',
            # 'xpu': 'gpu:0,1,2,3',
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'conf_thresh': 0.001,
                    # 'conf_thresh': 0.1,  # make training a bit faster
                    'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
                }),
            'criterion': (
                region_loss2.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'reduction': 32,
                    'seen': 0,
                    'coord_scale': 1.0,
                    'noobject_scale': 1.0,
                    'object_scale': 5.0,
                    'class_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                    # 'seen_thresh': 12800,
                }),

            # 'criterion': (light_region_loss.RegionLoss, {
            #     'num_classes': datasets['train'].num_classes,
            #     'anchors': anchors,
            #     'object_scale': 5.0,
            #     'noobject_scale': 1.0,

            #     # eav version originally had a random *2 in cls loss,
            #     # we removed, that but we can replicate it here.
            #     'class_scale': 1.0 if not ub.argflag('--eav') else 2.0,
            #     'coord_scale': 1.0,

            #     'thresh': 0.6,  # iou_thresh
            #     'seen_thresh': 12800,
            #     # 'small_boxes': not ub.argflag('--eav'),
            #     'small_boxes': True,
            #     'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0,
            # }),
            'initializer': (nh.initializers.Pretrained, {
                'fpath': weights,
            }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr_step_points[0],
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),
            'scheduler':
            scheduler_,
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': max_epoch,
                'max_epoch': max_epoch,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = YoloHarn(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    harn.config[
        'large_loss'] = 1000  # tell netharn when to check for divergence
    return harn
Ejemplo n.º 26
0
def train():
    """
    Example:
        >>> train()
    """
    import random
    np.random.seed(1031726816 % 4294967295)
    torch.manual_seed(137852547 % 4294967295)
    random.seed(2497950049 % 4294967295)

    xpu = xpu_device.XPU.from_argv()
    print('Chosen xpu = {!r}'.format(xpu))

    cifar_num = 10

    if ub.argflag('--lab'):
        datasets = cifar_training_datasets(output_colorspace='LAB',
                                           norm_mode='independent',
                                           cifar_num=cifar_num)
    elif ub.argflag('--rgb'):
        datasets = cifar_training_datasets(output_colorspace='RGB',
                                           norm_mode='independent',
                                           cifar_num=cifar_num)
    elif ub.argflag('--rgb-dep'):
        datasets = cifar_training_datasets(output_colorspace='RGB',
                                           norm_mode='dependant',
                                           cifar_num=cifar_num)
    else:
        raise AssertionError('specify --rgb / --lab')

    import netharn.models.densenet

    # batch_size = (128 // 3) * 3
    batch_size = 64

    # initializer_ = (initializers.KaimingNormal, {
    #     'nonlinearity': 'relu',
    # })

    lr = 0.1
    initializer_ = (initializers.LSUV, {})

    hyper = hyperparams.HyperParams(
        workdir=ub.ensuredir('train_cifar_work'),
        model=(
            netharn.models.densenet.DenseNet,
            {
                'cifar': True,
                'block_config': (32, 32, 32),  # 100 layer depth
                'num_classes': datasets['train'].n_classes,
                'drop_rate': float(ub.argval('--drop_rate', default=.2)),
                'groups': 1,
            }),
        optimizer=(
            torch.optim.SGD,
            {
                # 'weight_decay': .0005,
                'weight_decay':
                float(ub.argval('--weight_decay', default=.0005)),
                'momentum': 0.9,
                'nesterov': True,
                'lr': 0.1,
            }),
        scheduler=(nh.schedulers.ListedLR, {
            'points': {
                0: lr,
                150: lr * 0.1,
                250: lr * 0.01,
            },
            'interpolate': False
        }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'maximize': ['mAP'],
            'patience': 314,
            'max_epoch': 314,
        }),
        initializer=initializer_,
        criterion=(torch.nn.CrossEntropyLoss, {}),
        # Specify anything else that is special about your hyperparams here
        # Especially if you make a custom_batch_runner
        augment=str(datasets['train'].augmenter),
        other=ub.dict_union(
            {
                # TODO: type of augmentation as a parameter dependency
                # 'augmenter': str(datasets['train'].augmenter),
                # 'augment': datasets['train'].augment,
                'batch_size': batch_size,
                'colorspace': datasets['train'].output_colorspace,
                'n_classes': datasets['train'].n_classes,
                # 'center_inputs': datasets['train'].center_inputs,
            },
            datasets['train'].center_inputs.__dict__),
    )
    # if ub.argflag('--rgb-indie'):
    #     hyper.other['norm'] = 'dependant'
    hyper.input_ids['train'] = datasets['train'].input_id

    xpu = xpu_device.XPU.cast('auto')
    print('xpu = {}'.format(xpu))

    data_kw = {'batch_size': batch_size}
    if xpu.is_gpu():
        data_kw.update({'num_workers': 8, 'pin_memory': True})

    tags = ['train', 'vali', 'test']

    loaders = ub.odict()
    for tag in tags:
        dset = datasets[tag]
        shuffle = tag == 'train'
        data_kw_ = data_kw.copy()
        if tag != 'train':
            data_kw_['batch_size'] = max(batch_size // 4, 1)
        loader = torch.utils.data.DataLoader(dset, shuffle=shuffle, **data_kw_)
        loaders[tag] = loader

    harn = fit_harness.FitHarness(
        hyper=hyper,
        datasets=datasets,
        xpu=xpu,
        loaders=loaders,
    )
    # harn.monitor = early_stop.EarlyStop(patience=40)
    harn.monitor = monitor.Monitor(min_keys=['loss'],
                                   max_keys=['global_acc', 'class_acc'],
                                   patience=40)

    harn.initialize()
    harn.run()
Ejemplo n.º 27
0
def train_mnist():
    """
    CommandLine:
        python examples/mnist.py

        python ~/code/netharn/examples/mnist.py --gpu=2
        python ~/code/netharn/examples/mnist.py
    """
    root = os.path.expanduser('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
    ])

    learn_dset = torchvision.datasets.MNIST(root,
                                            transform=transform,
                                            train=True,
                                            download=True)

    test_dset = torchvision.datasets.MNIST(root,
                                           transform=transform,
                                           train=True,
                                           download=True)

    train_dset = learn_dset
    vali_dset = copy.copy(learn_dset)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    valid_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    def _torch_take(tensor, indices, axis):
        TensorType = learn_dset.train_data.type()
        TensorType = getattr(torch, TensorType.split('.')[1])
        return TensorType(tensor.numpy().take(indices, axis=axis))

    vali_dset.train_data = _torch_take(learn_dset.train_data,
                                       valid_idx,
                                       axis=0)
    vali_dset.train_labels = _torch_take(learn_dset.train_labels,
                                         valid_idx,
                                         axis=0).long()

    train_dset.train_data = _torch_take(learn_dset.train_data,
                                        train_idx,
                                        axis=0)
    train_dset.train_labels = _torch_take(learn_dset.train_labels,
                                          train_idx,
                                          axis=0).long()

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]

    batch_size = 128
    n_classes = 10
    xpu = nh.XPU.from_argv(min_memory=300)

    if False:
        initializer = (nh.initializers.Pretrained, {
            'fpath': 'path/to/pretained/weights.pt'
        })
    else:
        initializer = (nh.initializers.KaimingNormal, {})

    loaders = ub.odict()
    data_kw = {'batch_size': batch_size}
    if xpu.is_gpu():
        data_kw.update({'num_workers': 6, 'pin_memory': True})
    for tag in ['train', 'vali', 'test']:
        if tag not in datasets:
            continue
        dset = datasets[tag]
        shuffle = tag == 'train'
        data_kw_ = data_kw.copy()
        loader = torch.utils.data.DataLoader(dset, shuffle=shuffle, **data_kw_)
        loaders[tag] = loader

    # Workaround deadlocks with DataLoader
    import cv2
    cv2.setNumThreads(0)
    """
    # Here is the FitHarn magic.
    # This keeps track of your stuff
    """
    hyper = nh.hyperparams.HyperParams(
        nice='mnist',
        xpu=xpu,
        workdir=ub.truepath('~//work/mnist/'),
        datasets=datasets,
        loaders=loaders,
        model=(MnistNet, dict(n_channels=1, n_classes=n_classes)),
        # optimizer=torch.optim.Adam,
        optimizer=(torch.optim.SGD, {
            'lr': 0.01
        }),
        # FIXME: the ReduceLROnPleateau is broken with restarts
        scheduler='ReduceLROnPlateau',
        criterion=torch.nn.CrossEntropyLoss,
        initializer=initializer,
        monitor=(
            nh.Monitor,
            {
                # 'minimize': ['loss'],
                # 'maximize': ['acc'],
                'patience': 10,
                'max_epoch': 300,
                'smoothing': .4,
            }),
        other={
            # record any other information that will be used to compare
            # different training runs here
            'n_classes': n_classes,
        })

    harn = MnistHarn(hyper=hyper)

    # Set how often vali / test will be run
    harn.intervals.update({
        # 'vali': slice(5, None, 1),

        # Start testing after the 5th epoch and then test every 4 epochs
        'test': slice(5, None, 4),
    })

    reset = ub.argflag('--reset')
    harn.initialize(reset=reset)
    harn.run()
Ejemplo n.º 28
0
def check_inconsistency():
    import netharn as nh
    import numpy as np
    import torch
    import ubelt as ub
    from netharn.models.yolo2 import light_yolo
    from netharn.models.yolo2 import light_region_loss

    yolo_voc = ub.import_module_from_path(ub.truepath('~/code/netharn/examples/yolo_voc.py'))
    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = 8
    bstep = 8
    workers = 0
    decay = 0.0005
    lr = 0.001
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    # We will divide the learning rate by the simulated batch size
    datasets = {
        # 'train': yolo_voc.YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': yolo_voc.YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size, num_workers=workers,
                              shuffle=(key == 'train'), pin_memory=True,
                              resize_rate=10 * bstep, drop_last=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    assert simulated_bsize == 64, 'must be 64'

    lr_step_points = {
        0:   0,  # Hack to see performance before any learning
        1:   0,
        2:   lr * 1.0 / simulated_bsize,
        3:   lr * 1.0 / simulated_bsize,
    }
    max_epoch = 3

    # Anchors
    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    hyper = nh.HyperParams(**{
        'nice': nice,
        'workdir': ub.truepath('~/work/devcheck_yolo'),
        'datasets': datasets,
        'xpu': xpu,

        # a single dict is applied to all datset loaders
        'loaders': loaders,

        'model': (light_yolo.Yolo, {
            # 'num_classes': datasets['train'].num_classes,
            'num_classes': 20,
            'anchors': anchors,
            # 'conf_thresh': 0.001,
            'conf_thresh': 0.1,  # make training a bit faster
            # nms_thresh=0.5 to reproduce original yolo
            # nms_thresh=0.4 to reproduce lightnet
            'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
        }),

        'criterion': (light_region_loss.RegionLoss, {
            # 'num_classes': datasets['train'].num_classes,
            'num_classes': 20,
            'anchors': anchors,
            'object_scale': 5.0,
            'noobject_scale': 1.0,
            'class_scale': 1.0,
            'coord_scale': 1.0,
            'thresh': 0.6,  # iou_thresh
        }),

        'initializer': (nh.initializers.Pretrained, {
            # 'fpath': light_yolo.initial_imagenet_weights(),
            'fpath': light_yolo.demo_voc_weights(),
        }),

        'optimizer': (torch.optim.SGD, {
            'lr': lr_step_points[0],
            'momentum': 0.9,
            'dampening': 0,
            # multiplying by batch size was one of those unpublished details
            'weight_decay': decay * simulated_bsize,
        }),

        'scheduler': (nh.schedulers.core.YOLOScheduler, {
            'points': lr_step_points,
            'interpolate': True,
            'burn_in': 1,
            # 'dset_size': len(datasets['train']),  # when drop_last=False
            'dset_size': len(datasets['test']),  # when drop_last=False
            'batch_size': batch_size,
        }),

        'monitor': (nh.Monitor, {
            'minimize': ['loss'],
            'maximize': ['mAP'],
            'patience': max_epoch,
            'max_epoch': max_epoch,
        }),
        # 'augment': datasets['train'].augmenter,
        'dynamics': {'batch_step': bstep},
        'other': {
            'nice': nice,
            'ovthresh': ovthresh,
        },
    })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = yolo_voc.YoloHarn(hyper=hyper)
    harn.config['use_tqdm'] = False
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    harn.initialize()
    harn.run()
Ejemplo n.º 29
0
def _save_requested(fpath_, save_parts):
    raise NotImplementedError('havent done this yet')
    # dpi = ub.argval('--dpi', type_=int, default=200)
    from os.path import expanduser
    from matplotlib import pyplot as plt
    dpi = 200
    fpath_ = expanduser(fpath_)
    print('Figure save was requested')
    # arg_dict = ut.get_arg_dict(prefix_list=['--', '-'],
    #                            type_hints={'t': list, 'a': list})
    arg_dict = {}
    # HACK
    arg_dict = {
        key: (val[0] if len(val) == 1 else '[' + ']['.join(val) + ']')
        if isinstance(val, list) else val
        for key, val in arg_dict.items()
    }
    fpath_ = fpath_.format(**arg_dict)
    fpath_ = fpath_.replace(' ', '').replace('\'', '').replace('"', '')
    dpath = ub.argval('--dpath', type_=str, default=None)
    if dpath is None:
        gotdpath = False
        dpath = '.'
    else:
        gotdpath = True

    fpath = join(dpath, fpath_)
    if not gotdpath:
        dpath = dirname(fpath_)
    print('dpath = %r' % (dpath,))

    fig = plt.gcf()
    fig.dpi = dpi

    fpath_strict = ub.expandpath(fpath)
    CLIP_WHITE = ub.argflag('--clipwhite')
    from netharn import util

    if save_parts:
        # TODO: call save_parts instead, but we still need to do the
        # special grouping.

        # Group axes that belong together
        atomic_axes = []
        seen_ = set([])
        for ax in fig.axes:
            div = _get_plotdat(ax, _DF2_DIVIDER_KEY, None)
            if div is not None:
                df2_div_axes = _get_plotdat_dict(ax).get('df2_div_axes', [])
                seen_.add(ax)
                seen_.update(set(df2_div_axes))
                atomic_axes.append([ax] + df2_div_axes)
                # TODO: pad these a bit
            else:
                if ax not in seen_:
                    atomic_axes.append([ax])
                    seen_.add(ax)

        hack_axes_group_row = ub.argflag('--grouprows')
        if hack_axes_group_row:
            groupid_list = []
            for axs in atomic_axes:
                for ax in axs:
                    groupid = ax.colNum
                groupid_list.append(groupid)

            groups = ub.group_items(atomic_axes, groupid_list)
            new_groups = list(map(ub.flatten, groups.values()))
            atomic_axes = new_groups
            #[[(ax.rowNum, ax.colNum) for ax in axs] for axs in atomic_axes]
            # save all rows of each column

        subpath_list = save_parts(fig=fig, fpath=fpath_strict,
                                  grouped_axes=atomic_axes, dpi=dpi)
        absfpath_ = subpath_list[-1]

        if CLIP_WHITE:
            for subpath in subpath_list:
                # remove white borders
                util.clipwhite_ondisk(subpath, subpath)
    else:
        savekw = {}
        # savekw['transparent'] = fpath.endswith('.png') and not noalpha
        savekw['transparent'] = ub.argflag('--alpha')
        savekw['dpi'] = dpi
        savekw['edgecolor'] = 'none'
        savekw['bbox_inches'] = extract_axes_extents(fig, combine=True)  # replaces need for clipwhite
        absfpath_ = ub.expandpath(fpath)
        fig.savefig(absfpath_, **savekw)

        if CLIP_WHITE:
            # remove white borders
            fpath_in = fpath_out = absfpath_
            util.clipwhite_ondisk(fpath_in, fpath_out)

    if ub.argflag(('--diskshow', '--ds')):
        # show what we wrote
        ub.startfile(absfpath_)
Ejemplo n.º 30
0
def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()
Ejemplo n.º 31
0
def setup_harness(workers=None):
    """
    CommandLine:
        python ~/code/clab/examples/yolo_voc.py setup_harness
        python ~/code/clab/examples/yolo_voc.py setup_harness --profile

    Example:
        >>> harn = setup_harness(workers=0)
        >>> harn.initialize()
        >>> harn.dry = True
        >>> # xdoc: +SKIP
        >>> harn.run()
    """
    workdir = ub.truepath('~/work/VOC2007')
    devkit_dpath = ub.truepath('~/data/VOC/VOCdevkit')
    YoloVOCDataset.ensure_voc_data()

    if ub.argflag('--2007'):
        dsetkw = {'years': [2007]}
    elif ub.argflag('--2012'):
        dsetkw = {'years': [2007, 2012]}
    else:
        dsetkw = {'years': [2007]}

    data_choice = ub.argval('--data', 'normal')

    if data_choice == 'combined':
        datasets = {
            'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw),
            'train': YoloVOCDataset(devkit_dpath, split='trainval', **dsetkw),
        }
    elif data_choice == 'notest':
        datasets = {
            'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw),
            'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw),
        }
    elif data_choice == 'normal':
        datasets = {
            'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw),
            'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw),
            'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw),
        }
    else:
        raise KeyError(data_choice)

    nice = ub.argval('--nice', default=None)

    pretrained_fpath = darknet.initial_weights()

    # NOTE: XPU implicitly supports DataParallel just pass --gpu=0,1,2,3
    xpu = xpu_device.XPU.cast('argv')
    print('xpu = {!r}'.format(xpu))

    ensure_ulimit()

    postproc_params = dict(
        conf_thresh=0.001,
        nms_thresh=0.5,
        ovthresh=0.5,
    )

    max_epoch = 160

    lr_step_points = {
        0: 0.001,
        60: 0.0001,
        90: 0.00001,
    }

    if ub.argflag('--warmup'):
        lr_step_points = {
            # warmup learning rate
            0:  0.0001,
            1:  0.0001,
            2:  0.0002,
            3:  0.0003,
            4:  0.0004,
            5:  0.0005,
            6:  0.0006,
            7:  0.0007,
            8:  0.0008,
            9:  0.0009,
            10: 0.0010,
            # cooldown learning rate
            60: 0.0001,
            90: 0.00001,
        }

    batch_size = int(ub.argval('--batch_size', default=16))
    n_cpus = psutil.cpu_count(logical=True)
    workers = int(ub.argval('--workers', default=int(n_cpus / 2)))

    print('Making loaders')
    loaders = make_loaders(datasets, batch_size=batch_size,
                           workers=workers if workers is not None else workers)

    """
    Reference:
        Original YOLO9000 hyperparameters are defined here:
        https://github.com/pjreddie/darknet/blob/master/cfg/yolo-voc.2.0.cfg

        https://github.com/longcw/yolo2-pytorch/issues/1#issuecomment-286410772

        Notes:
            jitter is a translation / crop parameter
            https://groups.google.com/forum/#!topic/darknet/A-JJeXprvJU

            thresh in 2.0.cfg is iou_thresh here
    """

    print('Making hyperparams')
    hyper = hyperparams.HyperParams(

        model=(darknet.Darknet19, {
            'num_classes': datasets['train'].num_classes,
            'anchors': datasets['train'].anchors
        }),

        criterion=(darknet_loss.DarknetLoss, {
            'anchors': datasets['train'].anchors,
            'object_scale': 5.0,
            'noobject_scale': 1.0,
            'class_scale': 1.0,
            'coord_scale': 1.0,
            'iou_thresh': 0.6,
            'reproduce_longcw': ub.argflag('--longcw'),
            'denom': ub.argval('--denom', default='num_boxes'),
        }),

        optimizer=(torch.optim.SGD, dict(
            lr=lr_step_points[0],
            momentum=0.9,
            weight_decay=0.0005
        )),

        # initializer=(nninit.KaimingNormal, {}),
        initializer=(nninit.Pretrained, {
            'fpath': pretrained_fpath,
        }),

        scheduler=(ListedLR, dict(
            step_points=lr_step_points
        )),

        other=ub.dict_union({
            'nice': str(nice),
            'batch_size': loaders['train'].batch_sampler.batch_size,
        }, postproc_params),
        centering=None,

        # centering=datasets['train'].centering,
        augment=datasets['train'].augmenter,
    )

    harn = fit_harness.FitHarness(
        hyper=hyper, xpu=xpu, loaders=loaders, max_iter=max_epoch,
        workdir=workdir,
    )
    harn.postproc_params = postproc_params
    harn.nice = nice
    harn.monitor = monitor.Monitor(min_keys=['loss'],
                                   # max_keys=['global_acc', 'class_acc'],
                                   patience=max_epoch)

    @harn.set_batch_runner
    def batch_runner(harn, inputs, labels):
        """
        Custom function to compute the output of a batch and its loss.

        Example:
            >>> import sys
            >>> sys.path.append('/home/joncrall/code/clab/examples')
            >>> from yolo_voc import *
            >>> harn = setup_harness(workers=0)
            >>> harn.initialize()
            >>> batch = harn._demo_batch(0, 'train')
            >>> inputs, labels = batch
            >>> criterion = harn.criterion
            >>> weights_fpath = darknet.demo_weights()
            >>> state_dict = torch.load(weights_fpath)['model_state_dict']
            >>> harn.model.module.load_state_dict(state_dict)
            >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels)
        """
        # hack for data parallel
        # if harn.current_tag == 'train':
        outputs = harn.model(*inputs)
        # else:
        #     # Run test and validation on a single GPU
        #     outputs = harn.model.module(*inputs)

        # darknet criterion needs to know the input image shape
        inp_size = tuple(inputs[0].shape[-2:])

        aoff_pred, iou_pred, prob_pred = outputs
        gt_boxes, gt_classes, orig_size, indices, gt_weights = labels

        loss = harn.criterion(aoff_pred, iou_pred, prob_pred, gt_boxes,
                              gt_classes, gt_weights=gt_weights,
                              inp_size=inp_size, epoch=harn.epoch)
        return outputs, loss

    @harn.add_batch_metric_hook
    def custom_metrics(harn, output, labels):
        metrics_dict = ub.odict()
        criterion = harn.criterion
        metrics_dict['L_bbox'] = float(criterion.bbox_loss.data.cpu().numpy())
        metrics_dict['L_iou'] = float(criterion.iou_loss.data.cpu().numpy())
        metrics_dict['L_cls'] = float(criterion.cls_loss.data.cpu().numpy())
        return metrics_dict

    # Set as a harness attribute instead of using a closure
    harn.batch_confusions = []

    @harn.add_iter_callback
    def on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss):
        """
        Custom hook to run on each batch (used to compute mAP on the fly)

        Example:
            >>> harn = setup_harness(workers=0)
            >>> harn.initialize()
            >>> batch = harn._demo_batch(0, 'train')
            >>> inputs, labels = batch
            >>> criterion = harn.criterion
            >>> loader = harn.loaders['train']
            >>> weights_fpath = darknet.demo_weights()
            >>> state_dict = torch.load(weights_fpath)['model_state_dict']
            >>> harn.model.module.load_state_dict(state_dict)
            >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels)
            >>> tag = 'train'
            >>> on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss)
        """
        # Accumulate relevant outputs to measure
        gt_boxes, gt_classes, orig_size, indices, gt_weights = labels
        # aoff_pred, iou_pred, prob_pred = outputs
        im_sizes = orig_size
        inp_size = inputs[0].shape[-2:][::-1]

        conf_thresh = harn.postproc_params['conf_thresh']
        nms_thresh = harn.postproc_params['nms_thresh']
        ovthresh = harn.postproc_params['ovthresh']

        postout = harn.model.module.postprocess(outputs, inp_size, im_sizes,
                                                conf_thresh, nms_thresh)
        # batch_pred_boxes, batch_pred_scores, batch_pred_cls_inds = postout
        # Compute: y_pred, y_true, and y_score for this batch
        batch_pred_boxes, batch_pred_scores, batch_pred_cls_inds = postout
        batch_true_boxes, batch_true_cls_inds = labels[0:2]
        batch_orig_sz, batch_img_inds = labels[2:4]

        y_batch = []
        for bx, index in enumerate(batch_img_inds.data.cpu().numpy().ravel()):
            pred_boxes  = batch_pred_boxes[bx]
            pred_scores = batch_pred_scores[bx]
            pred_cxs    = batch_pred_cls_inds[bx]

            # Group groundtruth boxes by class
            true_boxes_ = batch_true_boxes[bx].data.cpu().numpy()
            true_cxs = batch_true_cls_inds[bx].data.cpu().numpy()
            true_weights = gt_weights[bx].data.cpu().numpy()

            # Unnormalize the true bboxes back to orig coords
            orig_size = batch_orig_sz[bx]
            sx, sy = np.array(orig_size) / np.array(inp_size)
            if len(true_boxes_):
                true_boxes = np.hstack([true_boxes_, true_weights[:, None]])
                true_boxes[:, 0:4:2] *= sx
                true_boxes[:, 1:4:2] *= sy

            y = voc.EvaluateVOC.image_confusions(true_boxes, true_cxs,
                                                 pred_boxes, pred_scores,
                                                 pred_cxs, ovthresh=ovthresh)
            y['gx'] = index
            y_batch.append(y)

        harn.batch_confusions.extend(y_batch)

    @harn.add_epoch_callback
    def on_epoch(harn, tag, loader):
        y = pd.concat(harn.batch_confusions)
        num_classes = len(loader.dataset.label_names)

        mean_ap, ap_list = voc.EvaluateVOC.compute_map(y, num_classes)

        harn.log_value(tag + ' epoch mAP', mean_ap, harn.epoch)
        # max_ap = np.nanmax(ap_list)
        # harn.log_value(tag + ' epoch max-AP', max_ap, harn.epoch)
        harn.batch_confusions.clear()

    return harn