Esempio n. 1
0
    def setup_loggers(harn):
        if harn.train_dpath is None:
            harn.warn('harn.train_dpath is None, cannot setup loggers')
            return

        use_file_logger = True
        if use_file_logger and harn.flog is None:
            flog_fname = 'fitlog_{}.log'.format(ub.timestamp())
            flog_fpath = os.path.join(harn.train_dpath, flog_fname)
            flog = logging.getLogger(harn.__class__.__name__)
            formatter = logging.Formatter('%(asctime)s : %(message)s')
            handler = logging.FileHandler(flog_fpath, mode='w')
            handler.setFormatter(formatter)
            flog.propagate = False
            flog.setLevel(logging.DEBUG)
            flog.addHandler(handler)
            harn.flog = flog
            harn.debug('initialized file logger')
            flog_link = os.path.join(harn.train_dpath, 'fit.log')
            ub.symlink(flog_fpath, flog_link, overwrite=True)

        if tensorboard_logger:
            train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath)
            harn.log('dont forget to start: tensorboard --logdir ' +
                     train_base)
            harn.log('Initializing tensorboard')
            harn.tlog = tensorboard_logger.Logger(harn.train_dpath,
                                                  flush_secs=2)
        else:
            harn.log('Tensorboard is not available')
Esempio n. 2
0
def on_pick(event, infr=None):
    print('ON PICK: %r' % (event, ))
    artist = event.artist
    plotdat = util.mplutil._get_plotdat_dict(artist)
    if plotdat:
        if 'node' in plotdat:
            all_node_data = util.sort_dict(plotdat['node_data'].copy())
            visual_node_data = ub.dict_subset(all_node_data,
                                              infr.visual_node_attrs, None)
            node_data = util.delete_dict_keys(all_node_data,
                                              infr.visual_node_attrs)
            node = plotdat['node']
            node_data['degree'] = infr.graph.degree(node)
            node_label = infr.pos_graph.node_label(node)
            print('visual_node_data: ' + ub.repr2(visual_node_data, nl=1))
            print('node_data: ' + ub.repr2(node_data, nl=1))
            util.cprint('node: ' + ub.repr2(plotdat['node']), 'blue')
            print('(pcc) node_label = %r' % (node_label, ))
            print('artist = %r' % (artist, ))
        elif 'edge' in plotdat:
            all_edge_data = util.sort_dict(plotdat['edge_data'].copy())
            print(infr.repr_edge_data(all_edge_data))
            util.cprint('edge: ' + ub.repr2(plotdat['edge']), 'blue')
            print('artist = %r' % (artist, ))
        else:
            print('???: ' + ub.repr2(plotdat))
    print(ub.timestamp())
Esempio n. 3
0
 def dump_text(self):
     print("Dumping Profile Information")
     try:
         output_text, summary_text = self.get_text()
     except AttributeError:
         print('profile is not on')
     else:
         #profile.dump_stats('out.lprof')
         import ubelt as ub
         print(summary_text)
         ub.writeto('profile_output.txt', output_text + '\n' + summary_text)
         ub.writeto('profile_output.%s.txt' % (ub.timestamp()),
                    output_text + '\n' + summary_text)
Esempio n. 4
0
def cmake_clean(dpath='.'):
    """
    """
    dpath = ub.truepath(dpath)
    cmake_cache_fpath = join(dpath, 'CMakeCache.txt')
    assert exists(cmake_cache_fpath)
    fpath_set = set(glob.glob(join(dpath, '*'))) - {cmake_cache_fpath}

    for fpath in list(fpath_set):
        if basename(fpath).startswith('_cmake_build_backup_'):
            fpath_set.remove(fpath)

    backup_dpath = ub.ensuredir(
        join(dpath, '_cmake_build_backup_' + ub.timestamp()))
    for fpath in ub.ProgIter(fpath_set, 'moving files'):
        shutil.move(fpath, backup_dpath)
Esempio n. 5
0
 def dump_text(self):
     print("Dumping Profile Information")
     try:
         output_text, summary_text = self.get_text()
     except AttributeError:
         print('profile is not on')
     else:
         #profile.dump_stats('out.lprof')
         import ubelt as ub
         print(summary_text)
         suffix = ub.argval('--profname', default='')
         if suffix:
             suffix = '_' + suffix
         ub.writeto('profile_output{}.txt'.format(suffix), output_text + '\n' + summary_text)
         ub.writeto('profile_output{}.{}.txt'.format(suffix, ub.timestamp()),
                    output_text + '\n' + summary_text)
Esempio n. 6
0
    def save(self, data, cfgstr=None):
        """
        Writes data to path specified by `self.fpath(cfgstr)`.

        Metadata containing information about the cache will also be appended
        to an adjacent file with the `.meta` suffix.

        Example:
            >>> from ubelt.util_cache import *  # NOQA
            >>> # Normal functioning
            >>> cfgstr = 'long-cfg' * 32
            >>> cacher = Cacher('test_enabled_save', cfgstr)
            >>> cacher.save('data')
            >>> assert exists(cacher.get_fpath()), 'should be enabeled'
            >>> assert exists(cacher.get_fpath() + '.meta'), 'missing metadata'
            >>> # Setting the cacher as enabled=False turns it off
            >>> cacher2 = Cacher('test_disabled_save', 'params', enabled=False)
            >>> cacher2.save('data')
            >>> assert not exists(cacher2.get_fpath()), 'should be disabled'
        """
        import ubelt as ub
        if not self.enabled:
            return
        if self.verbose > 0:
            self.log('[cacher] ... {} cache save'.format(self.fname))

        cfgstr = self._rectify_cfgstr(cfgstr)
        condensed = self._condense_cfgstr(cfgstr)

        # Make sure the cache directory exists
        ub.ensuredir(self.dpath)

        data_fpath = self.get_fpath(cfgstr=cfgstr)
        meta_fpath = data_fpath + '.meta'

        # Also save metadata file to reconstruct hashing
        with open(meta_fpath, 'a') as file_:
            # TODO: maybe append this in json format?
            file_.write('\n\nsaving {}\n'.format(ub.timestamp()))
            file_.write(self.fname + '\n')
            file_.write(condensed + '\n')
            file_.write(cfgstr + '\n')
            file_.write(str(self.meta) + '\n')

        with open(data_fpath, 'wb') as file_:
            # Use protocol 2 to support python2 and 3
            pickle.dump(data, file_, protocol=self.protocol)
Esempio n. 7
0
 def renew(self, cfgstr=None, product=None):
     """
     Recertify that the product has been recomputed by writing a new
     certificate to disk.
     """
     products = self._rectify_products(product)
     certificate = {
         'timestamp': ub.timestamp(),
         'product': products,
     }
     if products is not None:
         if not all(map(exists, products)):
             raise IOError(
                 'The stamped product must exist: {}'.format(products))
         certificate['product_file_hash'] = self._product_file_hash(
             products)
     self.cacher.save(certificate, cfgstr=cfgstr)
Esempio n. 8
0
def cmake_clean(dpath='.'):
    """
    """
    dpath = ub.expandpath(dpath)
    cmake_cache_fpath = join(dpath, 'CMakeCache.txt')
    if not exists(cmake_cache_fpath):
        raise Exception('This does not look like a cmake build directory. '
                        'No CMakeCache.txt exists')
    fpath_set = set(glob.glob(join(dpath, '*'))) - {cmake_cache_fpath}

    for fpath in list(fpath_set):
        if basename(fpath).startswith('_cmake_build_backup_'):
            fpath_set.remove(fpath)

    backup_dpath = ub.ensuredir(
        join(dpath, '_cmake_build_backup_' + ub.timestamp()))
    for fpath in ub.ProgIter(fpath_set, 'moving files'):
        shutil.move(fpath, backup_dpath)
Esempio n. 9
0
def export_model_code(dpath, model, initkw=None):
    """
    Exports the class used to define a pytorch model as a new python module.

    Exports the minimum amount of code needed to make a self-contained Python
    module defining the pytorch model class. This exports the actual source
    code. The advantage of using this over pickle is that the original code can
    change arbitrarilly because all dependencies on the original code are
    removed in the exported code.

    Args:
        dpath (str): directory to dump the model
        model (tuple or type or object): class or class instance (e.g. torch.nn.Module)
        name (str): name to use for the file (defaults to the classname)
        initkw (dict): if specified, creates the function `make`, which
            initializes the network with the specific arguments.

    Returns:
        str: static_modpath: path to the saved model file.
            While you could put the output path in your PYTHONPATH, it is best
            to use `ub.import_module_from_path` to "load" the model instead.

    CommandLine:
        xdoctest -m netharn.export.exporter export_model_code

    Example:
        >>> from torchvision.models import densenet
        >>> from os.path import basename
        >>> initkw = {'growth_rate': 16}
        >>> model = densenet.DenseNet(**initkw)
        >>> dpath = ub.ensure_app_cache_dir('netharn/tests')
        >>> static_modpath = export_model_code(dpath, model, initkw)
        >>> print('static_modpath = {!r}'.format(static_modpath))
        ...
        >>> print(basename(static_modpath))
        DenseNet_256629.py
        >>> # now the module can be loaded
        >>> module = ub.import_module_from_path(static_modpath)
        >>> loaded = module.make()
        >>> assert model.features.denseblock1.denselayer1.conv2.out_channels == 16
        >>> assert loaded.features.denseblock1.denselayer1.conv2.out_channels == 16
        >>> assert model is not loaded
    """
    if isinstance(model, type):
        model_class = model
    else:
        model_class = model.__class__
    classname = model_class.__name__

    if initkw is None:
        raise NotImplementedError(
            'ERROR: The params passed to the model __init__ must be available')
        footer = ''
    else:
        # First see if we can get away with a simple encoding of initkw
        try:
            # Do not use repr. The text produced is non-deterministic for
            # dictionaries. Instead, use ub.repr2, which is deterministic.
            init_text = ub.repr2(initkw, nl=1)
            eval(init_text, {})
            init_code = ub.codeblock('initkw = {}').format(init_text)
        except Exception:
            # fallback to pickle
            warnings.warn('Initialization params might not be serialized '
                          'deterministically')
            init_bytes = repr(pickle.dumps(initkw, protocol=0))
            init_code = ub.codeblock('''
                import pickle
                initkw = pickle.loads({})
                ''').format(init_bytes)
        init_code = ub.indent(init_code).lstrip()
        # create a function to instanciate the class
        footer = '\n\n' + ub.codeblock('''
            __pt_export_version__ = '{__pt_export_version__}'


            def get_initkw():
                """ creates an instance of the model """
                {init_code}
                return initkw


            def get_model_cls():
                model_cls = {classname}
                return model_cls


            def make():
                """ creates an instance of the model """
                initkw = get_initkw()
                model_cls = get_model_cls()
                model = model_cls(**initkw)
                return model
            ''').format(classname=classname,
                        init_code=init_code,
                        __pt_export_version__=__pt_export_version__)

        # TODO: assert that the name "make" is not used in the model body

    body = closer.source_closure(model_class)

    body_footer = body + footer + '\n'
    # dont need to hash the header, because comments are removed anyway
    hashid = hash_code(body_footer)

    header = ub.codeblock('''
        """
        This module was autogenerated by netharn/export/exporter.py
        original_module={}
        classname={}
        timestamp={}
        hashid={}
        """
        ''').format(model_class.__module__, classname, ub.timestamp(), hashid)

    sourcecode = header + '\n' + body_footer

    static_modname = classname + '_' + hashid[0:6]
    static_modpath = join(dpath, static_modname + '.py')
    with open(static_modpath, 'w') as file:
        file.write(sourcecode)
    return static_modpath
Esempio n. 10
0
def timestamp_badmethod():
    with pytest.raises(ValueError):
        ub.timestamp(method='not real')
Esempio n. 11
0
def test_timestamp():
    stamp = ub.timestamp()
    assert re.match(r'\d+-\d+-\d+T\d+[\+\-]\d+', stamp)
Esempio n. 12
0
def test_timestamp_value_error():
    with pytest.raises(ValueError):
        ub.timestamp(method='bad-method')
Esempio n. 13
0
def test_timestamp():
    stamp = ub.timestamp()
    assert re.match('\d+-\d+-\d+T\d+\+\d+', stamp)
Esempio n. 14
0
    def initialize(harn):
        # TODO: Initialize the classes and then have a different function move
        # everything to GPU
        harn.xpu.set_as_default()
        harn.debug('harn.xpu = {!r}'.format(harn.xpu))

        if harn.train_dpath is None:
            harn.setup_dpath(harn.workdir)

        use_file_logger = True
        if use_file_logger and harn.flog is None:
            flog_fname = 'fitlog_{}.log'.format(ub.timestamp())
            flog_fpath = join(harn.train_dpath, flog_fname)
            flog = logging.getLogger(harn.__class__.__name__)
            formatter = logging.Formatter('%(asctime)s : %(message)s')
            handler = logging.FileHandler(flog_fpath, mode='w')
            handler.setFormatter(formatter)
            flog.propagate = False
            flog.setLevel(logging.DEBUG)
            flog.addHandler(handler)
            harn.flog = flog
            harn.debug('initialized file logger')

        if tensorboard_logger:
            train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath)
            harn.log('dont forget to start: tensorboard --logdir ' +
                     train_base)
            harn.log('Initializing tensorboard')
            harn.tlogger = tensorboard_logger.Logger(harn.train_dpath,
                                                     flush_secs=2)

        if harn.dry:
            harn.log('Dry run of training harness. xpu={}'.format(harn.xpu))
            harn.optimizer = None
        else:
            prev_states = harn.prev_snapshots()

            model_name = harn.hyper.model_cls.__name__

            if harn.hyper.criterion_cls:
                harn.log('Criterion: {}'.format(
                    harn.hyper.criterion_cls.__name__))
            else:
                harn.log('Criterion: Custom')

            harn.log('Optimizer: {}'.format(harn.hyper.optimizer_cls.__name__))

            if harn.hyper.scheduler_cls:
                harn.log('Scheduler: {}'.format(
                    harn.hyper.scheduler_cls.__name__))
            else:
                harn.log('No Scheduler')

            harn.model = harn.hyper.make_model()
            harn.initializer = harn.hyper.make_initializer()

            harn.log('Mounting {} model on {}'.format(model_name, harn.xpu))
            harn.model = harn.xpu.mount(harn.model)

            n_params = number_of_parameters(harn.model)
            harn.log('Model has {!r} parameters'.format(n_params))

            # more than one criterion? Wrap it in a single criterion OR
            # specify a custom batch runner.
            if harn.hyper.criterion_cls:
                harn.criterion = harn.hyper.criterion_cls(
                    **harn.hyper.criterion_params)
                harn.log('Move {} model to {}'.format(harn.criterion,
                                                      harn.xpu))
                harn.criterion = harn.xpu.move(harn.criterion)
            else:
                pass

            harn.log('Make optimizer')
            harn.optimizer = harn.hyper.make_optimizer(harn.model.parameters())

            if harn.hyper.scheduler_cls:
                harn.log('Make scheduler')
                harn.scheduler = harn.hyper.make_scheduler(harn.optimizer)

            needs_init = True
            harn.log('There are {} existing snapshots'.format(
                len(prev_states)))
            if prev_states and not ub.argflag('--reset'):
                harn.log('Loading previous states')
                # Ignore corrupted snapshots
                for load_path in reversed(prev_states):
                    try:
                        harn.load_snapshot(load_path)
                    except RuntimeError:
                        harn.log(
                            'Failed to load {}. Skiping.'.format(load_path))
                    else:
                        needs_init = False
                        break
                for i, group in enumerate(harn.optimizer.param_groups):
                    if 'initial_lr' not in group:
                        raise KeyError(
                            "param 'initial_lr' is not specified "
                            "in param_groups[{}] when resuming an optimizer".
                            format(i))

            if needs_init:
                harn.log('Initializing new model')
                if harn.initializer.__class__.__name__ == 'LSUV':
                    # harn.model = harn.xpu.mount(harn.model)
                    # set([p.is_cuda for p in harn.model.parameters()])

                    #hack LSUV needs a batch of data to run
                    with grad_context(False):
                        # import utool
                        # utool.embed()
                        loader = harn.loaders['train']
                        input, labels = next(iter(loader))
                        data = harn.xpu.variable(input)
                        harn.initializer(harn.model, data)
                else:
                    harn.initializer(harn.model)
                if not harn.dry:
                    for group in harn.optimizer.param_groups:
                        group.setdefault('initial_lr', group['lr'])

            harn.log(
                'Snapshots will save to harn.snapshot_dpath = {!r}'.format(
                    harn.snapshot_dpath))
Esempio n. 15
0
 def _debug(msg):
     msg_ = '[{}, {}, {}] '.format(ub.timestamp(), proc, proc.pid) + msg + '\n'
     if is_main:
         print(msg_)
     with open(cache_gpath + '.atomic.debug', 'a') as f:
         f.write(msg_)
Esempio n. 16
0
def _cog_cache_write(gpath, cache_gpath, config=None):
    """
    CommandLine:
        xdoctest -m ndsampler.abstract_frames _cog_cache_write

    Example:
        >>> # xdoctest: +REQUIRES(module:osgeo)
        >>> import ndsampler
        >>> from ndsampler.abstract_frames import *
        >>> import kwcoco
        >>> workdir = ub.ensure_app_cache_dir('ndsampler')
        >>> dset = kwcoco.CocoDataset.demo()
        >>> imgs = dset.images()
        >>> id_to_name = imgs.lookup('file_name', keepid=True)
        >>> id_to_path = {gid: join(dset.img_root, name)
        >>>               for gid, name in id_to_name.items()}
        >>> self = SimpleFrames(id_to_path, workdir=workdir)
        >>> image_id = ub.peek(id_to_name)
        >>> #gpath = self._lookup_gpath(image_id)

        #### EXIT
        # >>> hashid = self._lookup_hashid(image_id)
        # >>> cog_gname = '{}_{}.cog.tiff'.format(image_id, hashid)
        # >>> cache_gpath = cog_gpath = join(self.cache_dpath, cog_gname)
        # >>> _cog_cache_write(gpath, cache_gpath, {})
    """
    assert config is not None

    # FIXME: if gdal_translate is not installed (because libgdal exists, but
    # gdal-bin doesn't) then this seems to fail without throwing an error when
    # hack_use_cli=1.
    hack_use_cli = config.pop('hack_use_cli', False)

    if DEBUG_COG_ATOMIC_WRITE:
        import multiprocessing
        from multiprocessing import current_process
        from threading import current_thread
        is_main = (current_thread().name == 'MainThread' and
                   current_process().name == 'MainProcess')
        proc = multiprocessing.current_process()
        def _debug(msg):
            msg_ = '[{}, {}, {}] '.format(ub.timestamp(), proc, proc.pid) + msg + '\n'
            if is_main:
                print(msg_)
            with open(cache_gpath + '.atomic.debug', 'a') as f:
                f.write(msg_)
        _debug('attempts aquire'.format())

    if not hack_use_cli:
        # Load all the image data and dump it to cog format
        import kwimage
        if DEBUG_COG_ATOMIC_WRITE:
            _debug('reading data')
        raw_data = kwimage.imread(gpath)
        # raw_data = kwimage.atleast_3channels(raw_data, copy=False)
    # TODO: THERE HAS TO BE A CORRECT WAY TO DO THIS.
    # However, I'm not sure what it is. I extend my appologies to whoever is
    # maintaining this code. Note: mode MUST be 'w'

    with atomicwrites.atomic_write(cache_gpath + '.atomic', mode='w', overwrite=True) as file:
        if DEBUG_COG_ATOMIC_WRITE:
            _debug('begin')
            _debug('gpath = {}'.format(gpath))
            _debug('cache_gpath = {}'.format(cache_gpath))
        try:
            file.write('begin: {}\n'.format(ub.timestamp()))
            file.write('gpath = {}\n'.format(gpath))
            file.write('cache_gpath = {}\n'.format(cache_gpath))
            if not exists(cache_gpath):
                if not hack_use_cli:
                    util_gdal._imwrite_cloud_optimized_geotiff(
                        cache_gpath, raw_data, **config)
                else:
                    # The CLI is experimental and might make this pipeline
                    # faster by avoiding the initial read.
                    util_gdal._cli_convert_cloud_optimized_geotiff(
                        gpath, cache_gpath, **config)
                if DEBUG_COG_ATOMIC_WRITE:
                    _debug('finished write: {}\n'.format(ub.timestamp()))
            else:
                if DEBUG_COG_ATOMIC_WRITE:
                    _debug('ALREADY EXISTS did not write: {}\n'.format(ub.timestamp()))
            file.write('end: {}\n'.format(ub.timestamp()))
        except Exception as ex:
            file.write('FAILED DUE TO EXCEPTION: {}: {}\n'.format(ex, ub.timestamp()))
            if DEBUG_COG_ATOMIC_WRITE:
                _debug('FAILED DUE TO EXCEPTION: {}'.format(ex))
            raise
        finally:
            if DEBUG_COG_ATOMIC_WRITE:
                _debug('finally')

    if RUN_COG_CORRUPTION_CHECKS:
        # CHECK THAT THE DATA WAS WRITTEN CORRECTLY
        file = util_gdal.LazyGDalFrameFile(cache_gpath)
        is_valid = util_gdal.validate_nonzero_data(file)
        if not is_valid:
            if hack_use_cli:
                import kwimage
                raw_data = kwimage.imread(gpath)
            # The check may fail on zero images, so check that
            orig_sum = raw_data.sum()
            # cache_sum = file[:].sum()
            # if DEBUG:
            #     _debug('is_valid = {}'.format(is_valid))
            #     _debug('cache_sum = {}'.format(cache_sum))
            if orig_sum > 0:
                print('FAILED TO WRITE COG FILE')
                print('orig_sum = {!r}'.format(orig_sum))
                # print(kwimage.imread(cache_gpath).sum())
                if DEBUG_COG_ATOMIC_WRITE:
                    _debug('FAILED TO WRITE COG FILE')
                ub.delete(cache_gpath)
                raise CorruptCOG('FAILED TO WRITE COG FILE CORRECTLY')