Example #1
0
def setup_sampler(config):
    workdir = nh.configure_workdir(config,
                                   workdir=join('~/work/siam-ibeis2',
                                                config['dbname']))

    # TODO: cleanup and hook into ibeis AI
    if config['dbname'] == 'ggr2':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_train2018.json'),
            img_root=join(root, 'ggr2-coco/images/train2018'),
        )
        train_dset.hashid = 'ggr2-coco-train2018'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'ggr2-coco/annotations/instances_val2018.json'),
            img_root=join(root, 'ggr2-coco/images/val2018'),
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    if config['dbname'] == 'ggr2-revised':
        print('Creating torch CocoDataset')

        root = ub.expandpath('~/data/ggr2.coco.revised')
        print('root = {!r}'.format(root))

        train_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_train2019.json'),
            img_root=join(root, 'images/train2019'),
        )
        train_dset.hashid = 'ggr2-coco-revised-train2019'

        vali_dset = ndsampler.CocoDataset(
            data=join(root, 'annotations/instances_val2019.json'),
            img_root=join(root, 'images/val2019'),
        )
        vali_dset.hashid = 'ggr2-coco-revised-val2019'

        print('Creating samplers')
        samplers = {
            'train': ndsampler.CocoSampler(train_dset, workdir=workdir),
            'vali': ndsampler.CocoSampler(vali_dset, workdir=workdir),
        }
    else:
        raise KeyError(config['dbname'])

    return samplers, workdir
Example #2
0
 def demo(WindowedSamplerDataset, key='habcam', **kwargs):
     import ndsampler
     if key == 'habcam':
         dset_fpath = ub.expandpath('~/data/noaa/Habcam_2015_g027250_a00102917_c0001_v2_vali.mscoco.json')
         workdir = ub.expandpath('~/work/bioharn')
         dset = ndsampler.CocoDataset(dset_fpath)
         sampler = ndsampler.CocoSampler(dset, workdir=workdir, backend=None)
     else:
         sampler = ndsampler.CocoSampler.demo(key)
     self = WindowedSamplerDataset(sampler, **kwargs)
     return self
Example #3
0
def test_rel_file_link():
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_file_link')
    ub.delete(dpath, verbose=2)
    ub.ensuredir(dpath, verbose=2)

    real_fpath = join(ub.ensuredir((dpath, 'dir1')), 'real')
    link_fpath = join(ub.ensuredir((dpath, 'dir2')), 'link')
    ub.touch(real_fpath)

    orig = os.getcwd()
    try:
        os.chdir(dpath)
        real_path = relpath(real_fpath, dpath)
        link_path = relpath(link_fpath, dpath)
        link = ub.symlink(real_path, link_path)
        import sys
        if sys.platform.startswith('win32') and isfile(link):
            # Note: if windows hard links the file there is no way we can
            # tell that it was a symlink. Just verify it exists.
            from ubelt import _win32_links
            assert _win32_links._win32_is_hardlinked(real_fpath, link_fpath)
        else:
            pointed = ub.util_links._readlink(link)
            resolved = os.path.realpath(
                ub.expandpath(join(dirname(link), pointed)))
            assert os.path.realpath(ub.expandpath(real_fpath)) == resolved
    except Exception:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        print('TEST FAILED: test_rel_link')
        print('real_fpath = {!r}'.format(real_fpath))
        print('link_fpath = {!r}'.format(link_fpath))
        print('real_path = {!r}'.format(real_path))
        print('link_path = {!r}'.format(link_path))
        try:
            if 'link' in vars():
                print('link = {!r}'.format(link))
            if 'pointed' in vars():
                print('pointed = {!r}'.format(pointed))
            if 'resolved' in vars():
                print('resolved = {!r}'.format(resolved))
        except Exception:
            print('...rest of the names are not available')
        raise
    finally:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        os.chdir(orig)
Example #4
0
def determine_code_dpath():
    """
    Returns a good place to put the code for the internal dependencies.

    Returns:
        PathLike: the directory where you want to store your code

    In order, the methods used for determing this are:
        * the `--codedpath` command line flag (may be undocumented in the CLI)
        * the `--codedir` command line flag (may be undocumented in the CLI)
        * the CODE_DPATH environment variable
        * the CODE_DIR environment variable
        * the directory above this script (e.g. if this is in ~/code/repo/super_setup.py then code dir resolves to ~/code)
        * the user's ~/code directory.
    """
    import os

    candidates = [
        ub.argval('--codedir', default=''),
        ub.argval('--codedpath', default=''),
        os.environ.get('CODE_DPATH', ''),
        os.environ.get('CODE_DIR', ''),
    ]
    valid = [c for c in candidates if c != '']
    if len(valid) > 0:
        code_dpath = valid[0]
    else:
        try:
            # This file should be in the top level of a repo, the directory from
            # this file should be the code directory.
            this_fpath = abspath(__file__)
            code_dpath = abspath(dirname(dirname(this_fpath)))
        except NameError:
            code_dpath = ub.expandpath('~/code')

    if not exists(code_dpath):
        code_dpath = ub.expandpath(code_dpath)

    # if CODE_DIR and not exists(CODE_DIR):
    #     import warnings
    #     warnings.warn('environment variable CODE_DIR={!r} was defined, but does not exist'.format(CODE_DIR))

    if not exists(code_dpath):
        raise Exception(
            ub.codeblock("""
            Please specify a correct code_dir using the CLI or ENV.
            code_dpath={!r} does not exist.
            """.format(code_dpath)))
    return code_dpath
Example #5
0
def parse_mscoco():
    # Test that our implementation can handle the real mscoco data
    root = ub.expandpath('~/data/standard_datasets/mscoco/')

    fpath = join(root, 'annotations/instances_val2014.json')
    img_root = normpath(ub.ensuredir((root, 'images', 'val2014')))

    # fpath = join(root, 'annotations/stuff_val2017.json')
    # img_root = normpath(ub.ensuredir((root, 'images', 'val2017')))

    import ujson
    dataset = ujson.load(open(fpath, 'rb'))

    import ndsampler
    dset = ndsampler.CocoDataset(dataset)
    dset.img_root = img_root

    gid_iter = iter(dset.imgs.keys())

    gid = ub.peek(gid_iter)

    for gid in ub.ProgIter(gid_iter):
        img = dset.imgs[gid]
        ub.grabdata(img['coco_url'], dpath=img_root, verbose=0)
        anns = [dset.anns[aid] for aid in dset.gid_to_aids[gid]]
        dset.show_image(gid=gid)

    ann = anns[0]

    segmentation = ann['segmentation']

    from PIL import Image
    gpath = join(dset.img_root, img['file_name'])
    with Image.open(gpath) as pil_img:
        np_img = np.array(pil_img)
Example #6
0
 def history(self):
     """
     if available return the history of the model as well
     """
     import netharn as nh
     if self.info is None:
         # TODO: check for train_info.json in a few different places
         fpath = self._rectify_fpath()
         snap_fpath = ub.expandpath(fpath)
         candidate_paths = [
             join(dirname(snap_fpath), 'train_info.json'),
             join(dirname(dirname(snap_fpath)), 'train_info.json'),
         ]
         info = None
         for info_fpath in candidate_paths:
             info_fpath = normpath(info_fpath)
             try:
                 # Info might be inside of a zipfile
                 info = nh.util.read_json(nh.util.zopen(info_fpath))
                 break
             except Exception:
                 pass
         if info is None:
             info = '__UNKNOWN__'
     else:
         info = self.info
     return info
Example #7
0
    def normalize(self):
        if self['pretrained'] in ['null', 'None']:
            self['pretrained'] = None

        if self['datasets'] == 'special:voc':
            self['train_dataset'] = ub.expandpath('~/data/VOC/voc-trainval.mscoco.json')
            self['vali_dataset'] = ub.expandpath('~/data/VOC/voc-test-2007.mscoco.json')

        key = self.get('pretrained', None) or self.get('init', None)
        if key == 'imagenet':
            self['pretrained'] = yolo2.initial_imagenet_weights()
        elif key == 'lightnet':
            self['pretrained'] = yolo2.demo_voc_weights()

        if self['pretrained'] is not None:
            self['init'] = 'pretrained'
Example #8
0
    def _search_param_in_docstr(docstr, param_str):
        """
        Search `docstr` for type(-s) of `param_str`.
        >>> _search_param_in_docstr(':type param: int', 'param')
        ['int']
        >>> _search_param_in_docstr('@type param: int', 'param')
        ['int']
        >>> _search_param_in_docstr(
        ...   ':type param: :class:`threading.Thread`', 'param')
        ['threading.Thread']
        >>> bool(_search_param_in_docstr('no document', 'param'))
        False
        >>> _search_param_in_docstr(':param int param: some description', 'param')
        ['int']
        """
        if DEBUG:
            open(ub.expandpath('~/jedi-test.txt'), 'a').write('search param\n')
        # look at #40 to see definitions of those params
        patterns = [
            re.compile(p % re.escape(param_str))
            for p in module.DOCSTRING_PARAM_PATTERNS
        ]
        for pattern in patterns:
            match = pattern.search(docstr)
            if match:
                return [module._strip_rst_role(match.group(1))]

        return (module._search_param_in_numpydocstr(docstr, param_str)
                or list(_search_param_in_googledocstr(docstr, param_str)))
Example #9
0
def ensure_voc_coco(dpath=None):
    """
    Download the Pascal VOC data and convert it to coco, if it does exit.

    Args:
        dpath (str): download directory. Defaults to "~/data/VOC".

    Returns:
        Dict[str, str]: mapping from dataset tags to coco file paths.
            The original datasets have keys prefixed with underscores.
            The standard splits keys are train, vali, and test.
    """
    if dpath is None:
        dpath = ub.expandpath('~/data/VOC')

    paths = {
        '_train-2007': join(dpath, 'voc-train-2007.mscoco.json'),
        '_train-2012': join(dpath, 'voc-train-2007.mscoco.json'),
        '_val-2007': join(dpath, 'voc-val-2007.mscoco.json'),
        '_val-2012': join(dpath, 'voc-val-2012.mscoco.json'),
        'trainval': join(dpath, 'voc-trainval.mscoco.json'),
        'train': join(dpath, 'voc-train.mscoco.json'),
        'vali': join(dpath, 'voc-val.mscoco.json'),
        'test': join(dpath, 'voc-test-2007.mscoco.json'),
    }
    if not all(map(exists, paths.values())):
        ensure_voc_data(dpath=dpath)
        convert_voc_to_coco(dpath=dpath)

    return paths
Example #10
0
    def expand_module(path):
        # TODO: use ubelt util_import instead
        import ubelt as ub
        _debug = 0
        if _debug:
            import sys
            print('sys.base_exec_prefix = {!r}'.format(sys.base_exec_prefix))
            print('sys.base_prefix = {!r}'.format(sys.base_prefix))
            print('sys.exec_prefix = {!r}'.format(sys.exec_prefix))
            print('sys.executable = {!r}'.format(sys.executable))
            print('sys.implementation = {!r}'.format(sys.implementation))
            print('sys.prefix = {!r}'.format(sys.prefix))
            print('sys.version = {!r}'.format(sys.version))
            print('sys.path = {!r}'.format(sys.path))

        import sys
        extra_path = CONFIG.get('vimtk_sys_path')
        sys_path = sys.path + [ub.expandpath(p) for p in extra_path]
        print('expand path = {!r}'.format(path))
        modparts = path.split('.')
        for i in reversed(range(1, len(modparts) + 1)):
            candidate = '.'.join(modparts[0:i])
            print('candidate = {!r}'.format(candidate))
            path = ub.modname_to_modpath(candidate, sys_path=sys_path)
            if path is not None:
                break
        print('expanded modname-to-path = {!r}'.format(path))
        return path
Example #11
0
    def _search_return_in_numpydocstr(docstr):
        """
        Search `docstr` (in numpydoc format) for type(-s) of function returns.
        """
        if DEBUG:
            open(ub.expandpath('~/jedi-test.txt'),
                 'a').write('search return\n')
        try:
            doc = module._get_numpy_doc_string_cls()(docstr)
        except ImportError:
            return
        try:
            # This is a non-public API. If it ever changes we should be
            # prepared and return gracefully.
            returns = doc._parsed_data['Returns']
            returns += doc._parsed_data['Yields']
        except (KeyError, AttributeError):
            return
        for r_name, r_type, r_descr in returns:
            # Return names are optional and if so the type is in the name
            if not r_type:
                r_type = r_name
            for type_ in module._expand_typestr(r_type):
                yield type_

        # Injected code:
        # Check for google style return hint
        for type_ in _search_return_in_googledocstr(docstr):
            yield type_
Example #12
0
def configure_workdir(config={}, **kw):
    config = _update_defaults(config, kw)
    if config['workdir'] is None:
        config['workdir'] = kw['workdir']
    workdir = config['workdir'] = ub.expandpath(config['workdir'])
    ub.ensuredir(workdir)
    return workdir
Example #13
0
def main():
    try:
        script = __file__
    except NameError:
        raise
        # for Ipython hacking
        script = ub.expandpath(
            '~/code/ndsampler/dev/devcheck_python23_isect_index_cache.py')

    # py2 = ub.find_exe('python2')
    # py3 = ub.find_exe('python3')
    # ub.cmd([py2, script, 'load_regions'], shell=True)
    # ub.cmd([py3, script, 'save_regions'], shell=True)

    # Register scripts for activating python 2/3 virtual envs that have
    # ndsampler installed

    import getpass
    username = getpass.getuser()

    if username in ['joncrall', 'jon.crall']:
        # Hack for Jon's computer
        activate_cmds = {
            'python2': 'we py2.7',
            'python3': 'we venv3.6',
        }
    else:
        assert False, 'need to customize activation scripts for your machine'
        activate_cmds = {
            'python2': 'source ~/venv27/bin/activate',
            'python3': 'conda activate py36',
        }

    def run(py):
        bash_cmd = ' && '.join([
            'source $HOME/.bashrc',
            activate_cmds[py],
            'python {} access_cache'.format(script),
        ])
        sh_cmd = 'bash -c "{}"'.format(bash_cmd)
        info = ub.cmd(sh_cmd, shell=True, verbose=3)
        return info

    workdir = ub.ensure_app_cache_dir('ndsampler', 'tests', '23_regions')

    # Save in python3, load in python2
    print('\n\n--- SAVE Python3, LOAD Python2 ---')
    ub.delete(workdir, verbose=1)
    info = run('python3')  # NOQA
    assert info['ret'] == 0
    info = run('python2')  # NOQA
    assert info['ret'] == 0

    print('\n\n--- SAVE Python2, LOAD Python3 ---')
    ub.delete(workdir, verbose=1)  # Clear the cache
    info = run('python2')  # NOQA
    assert info['ret'] == 0
    info = run('python3')  # NOQA
    assert info['ret'] == 0
Example #14
0
def ensure_voc_data(dpath=None, force=False, years=[2007, 2012]):
    """
    Download the Pascal VOC data if it does not already exist.

    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> devkit_dpath = ensure_voc_data()
    """
    if dpath is None:
        dpath = ub.expandpath('~/data/VOC')
    devkit_dpath = join(dpath, 'VOCdevkit')
    # if force or not exists(devkit_dpath):
    ub.ensuredir(dpath)

    def extract_tarfile(fpath, dpath='.'):
        # Old way
        # ub.cmd('tar xvf "{}" -C "{}"'.format(fpath1, dpath), verbout=1)
        import tarfile
        try:
            tar = tarfile.open(fpath1)
            tar.extractall(dpath)
        finally:
            tar.close()

    fpath1 = ub.grabdata(
        'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar',
        dpath=dpath)
    if force or not exists(join(dpath, 'VOCdevkit', 'VOCcode')):
        extract_tarfile(fpath1, dpath)

    if 2007 in years:
        # VOC 2007 train+validation data
        fpath2 = ub.grabdata(
            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
            dpath=dpath)
        if force or not exists(
                join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                     'bird_trainval.txt')):
            extract_tarfile(fpath2, dpath)

        # VOC 2007 test data
        fpath3 = ub.grabdata(
            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
            dpath=dpath)
        if force or not exists(
                join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                     'bird_test.txt')):
            extract_tarfile(fpath3, dpath)

    if 2012 in years:
        # VOC 2012 train+validation data
        fpath4 = ub.grabdata(
            'https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar',
            dpath=dpath)
        if force or not exists(
                join(dpath, 'VOCdevkit', 'VOC2012', 'ImageSets', 'Main',
                     'bird_trainval.txt')):
            extract_tarfile(fpath4, dpath)
    return devkit_dpath
Example #15
0
 def demo(Repo, ensure=True):
     repo = Repo(
         remote='https://github.com/Erotemic/ubelt.git',
         code_dpath=ub.ensuredir(ub.expandpath('~/tmp/demo-repos')),
     )
     if ensure:
         repo.ensure()
     return repo
Example #16
0
 def cast(self, value=None):
     if isinstance(value, six.string_types):
         paths1 = sorted(glob.glob(ub.expandpath(value)))
         paths2 = smartcast.smartcast(value)
         if paths1:
             value = paths1
         else:
             value = paths2
     return value
Example #17
0
def setup_datasets(workdir=None):
    if workdir is None:
        workdir = ub.expandpath('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        # torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])

    learn_dset = nh.data.MNIST(workdir,
                               transform=transform,
                               train=True,
                               download=True)

    test_dset = nh.data.MNIST(workdir,
                              transform=transform,
                              train=False,
                              download=True)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    train_dset = torch.utils.data.Subset(learn_dset, train_idx)
    vali_dset = torch.utils.data.Subset(learn_dset, vali_idx)

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }
    if not ub.argflag('--test'):
        del datasets['test']
    for tag, dset in datasets.items():
        # Construct the PCCs (positive connected components)
        # These are groups of item indices which are positive matches
        if isinstance(dset, torch.utils.data.Subset):
            labels = dset.dataset.train_labels[dset.indices]
        else:
            labels = dset.labels
        unique_labels, groupxs = kwarray.group_indices(labels.numpy())
        dset.pccs = [xs.tolist() for xs in groupxs]

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]
    return datasets, workdir
Example #18
0
    def ensure_voc_data(VOCDataset,
                        dpath=None,
                        force=False,
                        years=[2007, 2012]):
        """
        Download the Pascal VOC 2007 data if it does not already exist.

        CommandLine:
            python -m netharn.data.voc VOCDataset.ensure_voc_data

        Example:
            >>> # SCRIPT
            >>> # xdoc: +REQUIRES(--voc)
            >>> from netharn.data.voc import *  # NOQA
            >>> VOCDataset.ensure_voc_data()
        """
        if dpath is None:
            dpath = ub.expandpath('~/data/VOC')
        devkit_dpath = join(dpath, 'VOCdevkit')
        # if force or not exists(devkit_dpath):
        ub.ensuredir(dpath)

        fpath1 = ub.grabdata(
            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar',
            dpath=dpath)
        if force or not exists(join(dpath, 'VOCdevkit', 'VOCcode')):
            ub.cmd('tar xvf "{}" -C "{}"'.format(fpath1, dpath), verbout=1)

        if 2007 in years:
            # VOC 2007 train+validation data
            fpath2 = ub.grabdata(
                'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                         'bird_trainval.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath2, dpath), verbout=1)

            # VOC 2007 test data
            fpath3 = ub.grabdata(
                'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                         'bird_test.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath3, dpath), verbout=1)

        if 2012 in years:
            # VOC 2012 train+validation data
            fpath4 = ub.grabdata(
                'https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2012', 'ImageSets', 'Main',
                         'bird_trainval.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath4, dpath), verbout=1)
        return devkit_dpath
Example #19
0
def test_rel_dir_link():
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_dir_link')
    ub.delete(dpath, verbose=2)
    ub.ensuredir(dpath, verbose=2)

    real_dpath = join(ub.ensuredir((dpath, 'dir1')), 'real')
    link_dpath = join(ub.ensuredir((dpath, 'dir2')), 'link')
    ub.ensuredir(real_dpath)

    orig = os.getcwd()
    try:
        os.chdir(dpath)
        real_path = relpath(real_dpath, dpath)
        link_path = relpath(link_dpath, dpath)
        link = ub.symlink(real_path, link_path)
        # Note: on windows this is hacked.
        pointed = ub.util_links._readlink(link)
        resolved = os.path.realpath(ub.expandpath(join(dirname(link),
                                                       pointed)))
        assert os.path.realpath(ub.expandpath(real_dpath)) == resolved
    except Exception:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        print('TEST FAILED: test_rel_link')
        print('real_dpath = {!r}'.format(real_dpath))
        print('link_dpath = {!r}'.format(link_dpath))
        print('real_path = {!r}'.format(real_path))
        print('link_path = {!r}'.format(link_path))
        try:
            if 'link' in vars():
                print('link = {!r}'.format(link))
            if 'pointed' in vars():
                print('pointed = {!r}'.format(pointed))
            if 'resolved' in vars():
                print('resolved = {!r}'.format(resolved))
        except Exception:
            print('...rest of the names are not available')
        raise
    finally:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        os.chdir(orig)
Example #20
0
def setup_datasets(workdir=None):
    if workdir is None:
        workdir = ub.expandpath('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
    ])

    learn_dset = torchvision.datasets.MNIST(workdir,
                                            transform=transform,
                                            train=True,
                                            download=True)

    test_dset = torchvision.datasets.MNIST(workdir,
                                           transform=transform,
                                           train=False,
                                           download=True)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    train_dset = torch.utils.data.Subset(learn_dset, train_idx)
    vali_dset = torch.utils.data.Subset(learn_dset, vali_idx)

    classes = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
        'nine'
    ]

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }
    for tag, dset in datasets.items():
        dset.classes = classes
        dset.num_classes = len(classes)

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]
    return datasets, workdir
Example #21
0
def count_ubelt_usage():
    import ubelt as ub
    import glob
    from os.path import join
    names = [
        'xdoctest',
        'netharn',
        'xdev',
        'xinspect',
        'ndsampler',
        'kwil',
        'kwarray',
        'kwimage',
        'kwplot',
        'scriptconfig',
    ]

    all_fpaths = []
    for name in names:
        repo_fpath = ub.expandpath(join('~/code', name))
        fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True)
        for fpath in fpaths:
            all_fpaths.append((name, fpath))

    import re
    pat = re.compile(r'\bub\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b')

    import ubelt as ub

    pkg_to_hist = ub.ddict(lambda: ub.ddict(int))
    for name, fpath in ub.ProgIter(all_fpaths):
        text = open(fpath, 'r').read()
        for match in pat.finditer(text):
            attr = match.groupdict()['attr']
            if attr in ub.__all__:
                pkg_to_hist[name][attr] += 1

    hist_iter = iter(pkg_to_hist.values())
    usage = next(hist_iter).copy()
    for other in hist_iter:
        for k, v in other.items():
            usage[k] += v
    for attr in ub.__all__:
        usage[attr] += 0

    for name in pkg_to_hist.keys():
        pkg_to_hist[name] = ub.odict(
            sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1])

    usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1])

    print(ub.repr2(pkg_to_hist, nl=2))
    print(ub.repr2(usage, nl=1))
Example #22
0
def main():
    import argparse
    parser = argparse.ArgumentParser(prog='manage_runs',
                                     description=ub.codeblock('''
            Summarize, cleanup, and mange runs in a workdir produced by netharn
            '''))
    parser.add_argument(*('-w', '--workdir'),
                        type=str,
                        help='specify the workdir for your project',
                        default=None)
    parser.add_argument(*('-f', '--force'),
                        help='dry run',
                        action='store_false',
                        dest='dry')
    # parser.add_argument(*('-n', '--dry'), help='dry run', action='store_true')
    parser.add_argument(*('--recent', ),
                        help='num recent to keep',
                        type=int,
                        default=100)
    parser.add_argument(*('--factor', ),
                        help='keep one every <factor> epochs',
                        type=int,
                        default=1)

    valid_modes = [
        'clean_runs',
        'clean_checkpoints',
        'clean_monitor',
        'summarize',
    ]

    parser.add_argument('--mode',
                        help='valid modes are {}'.format(
                            ', '.join(valid_modes)),
                        default='summarize')

    args, unknown = parser.parse_known_args()
    ns = args.__dict__.copy()
    print('ns = {!r}'.format(ns))

    mode = ns.pop('mode')
    ns['workdir'] = ub.expandpath(ns['workdir'])

    if mode == 'clean_runs':
        _devcheck_remove_dead_runs(workdir=ns['workdir'], dry=ns['dry'])
    elif mode == 'clean_checkpoints':
        _devcheck_manage_checkpoints(**ns)
    elif mode == 'clean_monitor':
        _devcheck_manage_monitor(workdir=ns['workdir'], dry=ns['dry'])
    elif mode == 'summarize':
        _summarize_workdir(workdir=ns['workdir'])
    else:
        raise KeyError(mode)
Example #23
0
def test_pathlib():
    try:
        import pathlib
        base = pathlib.Path(ub.ensure_app_cache_dir('ubelt'))
        dpath = base.joinpath('test_pathlib_mkdir')

        # ensuredir
        ub.delete(dpath)
        assert not dpath.exists()
        got = ub.ensuredir(dpath)
        assert got.exists()

        # compressuser
        assert ub.compressuser(base) == '~/.cache/ubelt'

        assert ub.augpath(base, prefix='foo') == '/home/joncrall/.cache/fooubelt'

        ub.expandpath(base)

    except Exception:
        import pytest
        pytest.skip('pathlib is not installed')
Example #24
0
def autogen_mkint_utils():
    import ubelt as ub

    # Uses netharn closer until it is ported to a standalone module
    from liberator import closer

    closer = closer.Closer()

    from ubelt import util_import

    closer.add_dynamic(util_import.split_modpath)
    closer.add_dynamic(util_import.modpath_to_modname)
    closer.add_dynamic(util_import.modname_to_modpath)

    closer.expand(["ubelt"])
    text = closer.current_sourcecode()
    print(text)

    import redbaron

    new_baron = redbaron.RedBaron(text)
    new_names = [n.name for n in new_baron.node_list if n.type in ["class", "def"]]

    import mkinit
    from mkinit import util
    from mkinit.util import util_import  # NOQA

    old_baron = redbaron.RedBaron(open(mkinit.util.util_import.__file__, "r").read())

    old_names = [n.name for n in old_baron.node_list if n.type in ["class", "def"]]

    set(old_names) - set(new_names)
    set(new_names) - set(old_names)

    prefix = ub.codeblock(
        '''
        # -*- coding: utf-8 -*-
        """
        This file was autogenerated based on code in ubelt
        """
        from __future__ import print_function, division, absolute_import, unicode_literals
        '''
    )

    code = prefix + "\n" + text + "\n"
    print(code)

    fpath = ub.expandpath("~/code/mkinit/mkinit/util/util_import.py")

    open(fpath, "w").write(code)
Example #25
0
def demo():
    """
    CommandLine:

        # Correctly reports skipped (although an only skipped test report
        # should probably be yellow)
        xdoctest -m dev/demo_issues.py demo_requires_skips_all_v1

        # Incorrectly reports success
        xdoctest -m dev/demo_issues.py demo_requires_skips_all_v2

        # Correctly reports success
        xdoctest -m dev/demo_issues.py demo_requires_skips_all_v2 --cliflag

        # Correctly reports success
        xdoctest -m dev/demo_issues.py demo_requires_skips_all_v1 --cliflag
    """

    # Programatic reproduction (notice the first one also reports itself in
    # pytest mode which is also wrong)
    import xdoctest
    xdoctest.doctest_callable(demo_requires_skips_all_v1)
    xdoctest.doctest_callable(demo_requires_skips_all_v2)

    import sys, ubelt
    sys.path.append(ubelt.expandpath('~/code/xdoctest/dev'))
    import demo_issues

    # Correctly reports skipped
    xdoctest.doctest_module(demo_issues,
                            command='demo_requires_skips_all_v1',
                            argv=[])

    # Incorrectly reports passed
    xdoctest.doctest_module(demo_issues,
                            command='demo_requires_skips_all_v2',
                            argv=[])

    # argv not respected?
    xdoctest.doctest_module(demo_issues,
                            command='demo_requires_skips_all_v1',
                            argv=['--cliflag'])

    # argv not respected?
    xdoctest.doctest_module(demo_issues,
                            command='demo_requires_skips_all_v2',
                            argv=['--cliflag'])
Example #26
0
 def expand_module_prefix(path):
     # TODO: we could parse the AST to figure out if the prefix is an alias
     # for a known module.
     # Check if the path certainly looks like it could be a chain of python
     # attribute accessors.
     if re.match(r'^[\w\d_.]*$', path):
         extra_path = CONFIG.get('vimtk_sys_path')
         sys_path = sys.path + [ub.expandpath(p) for p in extra_path]
         parts = path.split('.')
         for i in reversed(range(len(parts))):
             prefix = '.'.join(parts[:i])
             path = ub.modname_to_modpath(prefix, sys_path=sys_path)
             if path is not None:
                 print('expanded prefix = {!r}'.format(path))
                 return path
     print('expanded prefix = {!r}'.format(None))
     return None
Example #27
0
    def __init__(repo, **kwargs):
        repo.name = kwargs.pop('name', None)
        repo.dpath = kwargs.pop('dpath', None)
        repo.code_dpath = kwargs.pop('code_dpath', None)
        repo.remotes = kwargs.pop('remotes', None)
        repo.remote = kwargs.pop('remote', None)
        repo.branch = kwargs.pop('branch', 'master')

        repo._logged_lines = []
        repo._logged_cmds = []

        if repo.remote is None:
            if repo.remotes is None:
                raise ValueError('must specify some remote')
            else:
                if len(repo.remotes) > 1:
                    raise ValueError('remotes are ambiguous, specify one')
                else:
                    repo.remote = ub.peek(repo.remotes)
        else:
            if repo.remotes is None:
                _default_remote = 'origin'
                repo.remotes = {_default_remote: repo.remote}
                repo.remote = _default_remote

        repo.url = repo.remotes[repo.remote]

        if repo.name is None:
            suffix = repo.url.split('/')[-1]
            repo.name = suffix.split('.git')[0]

        if repo.dpath is None:
            repo.dpath = join(repo.code_dpath, repo.name)

        repo.pkg_dpath = join(repo.dpath, repo.name)

        for path_attr in ['dpath', 'code_dpath']:
            path = getattr(repo, path_attr)
            if path is not None:
                setattr(repo, path_attr, ub.expandpath(path))

        repo.verbose = kwargs.pop('verbose', 3)
        if kwargs:
            raise ValueError('unknown kwargs = {}'.format(kwargs.keys()))

        repo._pygit = None
Example #28
0
def cmake_clean(dpath='.'):
    """
    """
    dpath = ub.expandpath(dpath)
    cmake_cache_fpath = join(dpath, 'CMakeCache.txt')
    if not exists(cmake_cache_fpath):
        raise Exception('This does not look like a cmake build directory. '
                        'No CMakeCache.txt exists')
    fpath_set = set(glob.glob(join(dpath, '*'))) - {cmake_cache_fpath}

    for fpath in list(fpath_set):
        if basename(fpath).startswith('_cmake_build_backup_'):
            fpath_set.remove(fpath)

    backup_dpath = ub.ensuredir(
        join(dpath, '_cmake_build_backup_' + ub.timestamp()))
    for fpath in ub.ProgIter(fpath_set, 'moving files'):
        shutil.move(fpath, backup_dpath)
Example #29
0
def _autogen_xdoctest_utils():
    import ubelt as ub

    # Uses netharn closer until it is ported to a standalone module
    import netharn as nh
    closer = nh.export.closer.Closer()

    from ubelt import util_import
    closer.add_dynamic(util_import.split_modpath)
    closer.add_dynamic(util_import.modpath_to_modname)
    closer.add_dynamic(util_import.modname_to_modpath)
    closer.add_dynamic(util_import.import_module_from_name)
    closer.add_dynamic(util_import.import_module_from_path)
    closer.add_dynamic(util_import._pkgutil_modname_to_modpath)
    closer.add_dynamic(util_import._importlib_import_modpath)
    closer.add_dynamic(util_import.is_modname_importable)

    closer.expand(['ubelt'])
    text = closer.current_sourcecode()
    print(text)

    import redbaron
    new_baron = redbaron.RedBaron(text)
    new_names = [n.name for n in new_baron.node_list if n.type in ['class', 'def']]

    import xdoctest
    old_baron = redbaron.RedBaron(open(xdoctest.utils.util_import.__file__, 'r').read())

    old_names = [n.name for n in old_baron.node_list if n.type in ['class', 'def']]

    set(old_names) - set(new_names)
    set(new_names) - set(old_names)

    prefix = ub.codeblock(
        '''
        # -*- coding: utf-8 -*-
        """
        This file was autogenerated based on code in ubelt
        """
        from __future__ import print_function, division, absolute_import, unicode_literals
        ''')

    fpath = ub.expandpath('~/code/xdoctest/xdoctest/utils/util_import.py')
    open(fpath, 'w').write(prefix + '\n' + text + '\n')
Example #30
0
def main():
    flake8_errors = [
        'E126',  # continuation line hanging-indent
        'E127',  # continuation line over-indented for visual indent
        'E201',  # whitespace after '('
        'E202',  # whitespace before ']'
        'E203',  # whitespace before ', '
        'E221',  # multiple spaces before operator  (TODO: I wish I could make an exception for the equals operator. Is there a way to do this?)
        'E222',  # multiple spaces after operator
        'E241',  # multiple spaces after ,
        'E265',  # block comment should start with "# "
        'E271',  # multiple spaces after keyword
        'E272',  # multiple spaces before keyword
        'E301',  # expected 1 blank line, found 0
        'E305',  # expected 1 blank line after class / func
        'E306',  # expected 1 blank line before func
        #'E402',  # moduel import not at top
        'E501',  # line length > 79
        'W602',  # Old reraise syntax
        'E266',  # too many leading '#' for block comment
        'N801',  # function name should be lowercase [N806]
        'N802',  # function name should be lowercase [N806]
        'N803',  # argument should be lowercase [N806]
        'N805',  # first argument of a method should be named 'self'
        'N806',  # variable in function should be lowercase [N806]
        'N811',  # constant name imported as non constant
        'N813',  # camel case
        'W504',  # line break after binary operator
    ]
    flake8_args_list = [
        '--max-line-length 79',
        #'--max-line-length 100',
        '--ignore=' + ','.join(flake8_errors)
    ]
    flake8_args = ' '.join(flake8_args_list)

    import ubelt as ub
    import sys
    loc = ub.expandpath('~/code/xdoctest/xdoctest')
    command = 'flake8 ' + flake8_args + ' ' + loc
    print('command = {!r}'.format(command))
    info = ub.cmd(command, verbose=3)
    sys.exit(info['ret'])
Example #31
0
def write_default_ipython_profile():
    """
    CommandLine:
        python ~/local/init/init_ipython_config.py

        python -c "import xdev, ubelt; xdev.startfile(ubelt.truepath('~/.ipython/profile_default'))"
        python -c "import xdev, ubelt; xdev.editfile(ubelt.truepath('~/.ipython/profile_default/ipython_config.py'))"

    References:
        http://2sn.org/python/ipython_config.py
    """
    dpath = ub.expandpath('~/.ipython/profile_default')
    ub.ensuredir(dpath)
    ipy_config_fpath = join(dpath, 'ipython_config.py')
    ipy_config_text = ub.codeblock(
        r'''
        # STARTBLOCK
        import six
        c = get_config()  # NOQA
        c.InteractiveShellApp.exec_lines = []
        if six.PY2:
            future_line = (
                'from __future__ import absolute_import, division, print_function, with_statement, unicode_literals')
            c.InteractiveShellApp.exec_lines.append(future_line)
            # Fix sip versions
            try:
                import sip
                # http://stackoverflow.com/questions/21217399/pyqt4-qtcore-qvariant-object-instead-of-a-string
                sip.setapi('QVariant', 2)
                sip.setapi('QString', 2)
                sip.setapi('QTextStream', 2)
                sip.setapi('QTime', 2)
                sip.setapi('QUrl', 2)
                sip.setapi('QDate', 2)
                sip.setapi('QDateTime', 2)
                if hasattr(sip, 'setdestroyonexit'):
                    sip.setdestroyonexit(False)  # This prevents a crash on windows
            except ImportError as ex:
                pass
            except ValueError as ex:
                print('Warning: Value Error: %s' % str(ex))
                pass
        c.InteractiveShellApp.exec_lines.append('%load_ext autoreload')
        c.InteractiveShellApp.exec_lines.append('%autoreload 2')
        #c.InteractiveShellApp.exec_lines.append('%pylab qt4')
        c.InteractiveShellApp.exec_lines.append('import numpy as np')
        c.InteractiveShellApp.exec_lines.append('import ubelt as ub')
        c.InteractiveShellApp.exec_lines.append('import xdev')
        c.InteractiveShellApp.exec_lines.append('import pandas as pd')
        c.InteractiveShellApp.exec_lines.append('pd.options.display.max_columns = 40')
        c.InteractiveShellApp.exec_lines.append('pd.options.display.width = 160')
        c.InteractiveShellApp.exec_lines.append('pd.options.display.max_rows = 20')
        c.InteractiveShellApp.exec_lines.append('pd.options.display.float_format = lambda x: \'%.4f\' % (x,)')
        c.InteractiveShellApp.exec_lines.append('import networkx as nx')
        c.InteractiveShellApp.exec_lines.append('from os.path import *')
        c.InteractiveShellApp.exec_lines.append('from six.moves import cPickle as pickle')
        #c.InteractiveShellApp.exec_lines.append('if \'verbose\' not in vars():\\n    verbose = True')
        import ubelt as ub
        c.InteractiveShellApp.exec_lines.append(ub.codeblock(
            """
            class classproperty(property):
                def __get__(self, cls, owner):
                    return classmethod(self.fget).__get__(None, owner)()
            class vim(object):
                @classproperty
                def focus(cls):
                    from vimtk.cplat_ctrl import Window
                    Window.find('GVIM').focus()
                @classproperty
                def copy(cls):
                    import time
                    from vimtk.cplat_ctrl import Window
                    gvim_window = Window.find('GVIM')
                    gvim_window.focus()
                    import vimtk
                    import IPython
                    ipy = IPython.get_ipython()
                    lastline = ipy.history_manager.input_hist_parsed[-2]
                    vimtk.cplat.copy_text_to_clipboard(lastline)
                    from vimtk import xctrl
                    xctrl.XCtrl.do(
                        ('focus', 'GVIM'),
                        ('key', 'ctrl+v'),
                        ('focus', 'x-terminal-emulator.X-terminal-emulator')
                    )
            """
        ))
        #c.InteractiveShell.autoindent = True
        #c.InteractiveShell.colors = 'LightBG'
        #c.InteractiveShell.confirm_exit = False
        #c.InteractiveShell.deep_reload = True
        c.InteractiveShell.editor = 'gvim'
        #c.InteractiveShell.xmode = 'Context'
        # ENDBOCK
        '''
    )
    with open(ipy_config_fpath, 'w') as file:
        file.write(ipy_config_text + '\n')