Beispiel #1
0
def directory_structure(workdir,
                        arch,
                        datasets,
                        pretrained=None,
                        train_hyper_id=None,
                        suffix=''):
    """
    from .torch.urban_train import *
    datasets = load_task_dataset('urban_mapper_3d')
    datasets['train']._make_normalizer()
    arch = 'foobar'
    workdir = datasets['train'].task.workdir
    ut.exec_funckw(directory_structure, globals())
    """
    arch_dpath = ub.ensuredir((workdir, 'arch', arch))
    train_base = ub.ensuredir((arch_dpath, 'train'))

    if pretrained is None:
        train_init_id = 'None'
    elif len(pretrained) < 8:
        train_init_id = pretrained
    else:
        train_init_id = util.hash_data(pretrained)[:8]

    train_hyper_hashid = util.hash_data(train_hyper_id)[:8]

    train_id = '{}_{}_{}_{}'.format(datasets['train'].input_id, arch,
                                    train_init_id, train_hyper_hashid) + suffix

    train_dpath = ub.ensuredir(
        (train_base, 'input_' + datasets['train'].input_id,
         'solver_{}'.format(train_id)))

    train_info = {
        'arch': arch,
        'train_id': datasets['train'].input_id,
        'train_hyper_id': train_hyper_id,
        'train_hyper_hashid': train_hyper_hashid,
        'colorspace': datasets['train'].colorspace,
    }
    if hasattr(datasets['train'], 'center_inputs'):
        # Hack in centering information
        train_info['hack_centers'] = [
            (t.__class__.__name__, t.__getstate__())
            # ub.map_vals(str, t.__dict__)
            for t in datasets['train'].center_inputs.transforms
        ]
    util.write_json(join(train_dpath, 'train_info.json'), train_info)

    print('+=========')
    # print('hyper_strid = {!r}'.format(params.hyper_id()))
    print('train_init_id = {!r}'.format(train_init_id))
    print('arch = {!r}'.format(arch))
    print('train_hyper_hashid = {!r}'.format(train_hyper_hashid))
    print('train_hyper_id = {!r}'.format(train_hyper_id))
    print('train_id = {!r}'.format(train_id))
    print('+=========')

    return train_dpath
Beispiel #2
0
    def _parts_id(hyper, parts, short=False, hashed=False):
        id_parts = []
        for key, value in parts.items():
            if value is None:
                continue
            clsname, params = value
            type_str = clsname.split('.')[-1]
            id_parts.append(type_str)

            # Precidence of specifications (from lowest to highest)
            # SF=single flag, EF=explicit flag
            # SF-short, SF-hash, EF-short EF-hash
            request_short = short is True
            request_hash = hashed is True
            if (ub.iterable(short) and key in short):
                request_hash = False
                request_short = True
            if (ub.iterable(hashed) and key in hashed):
                request_hash = True
                request_short = False

            if request_hash:
                param_str = util.make_idstr(params)
                param_str = util.hash_data(param_str)[0:6]
            elif request_short:
                param_str = util.make_short_idstr(params)
            else:
                param_str = util.make_idstr(params)

            if param_str:
                id_parts.append(param_str)
        idstr = ','.join(id_parts)
        return idstr
Beispiel #3
0
 def _set_id_from_dependency(self, depends):
     """
     Allow for arbitrary representation of dependencies
     (user must ensure that it is consistent)
     """
     print('Preparing id for {} images'.format(self.tag))
     abbrev = 8
     hashid = util.hash_data(depends)[:abbrev]
     n_input = len(self)
     self.input_id = '{}-{}'.format(n_input, hashid)
     print(' * n_input = {}'.format(n_input))
     print(' * input_id = {}'.format(self.input_id))
Beispiel #4
0
def stitched_predictions(dataset, arches, xpu, arch_to_train_dpath, workdir,
                         _epochs, tag):

    dataset.inputs.input_id
    print('dataset.inputs.input_id = {!r}'.format(dataset.inputs.input_id))

    # Predict probabilities for each model in the ensemble
    arch_to_paths = {}
    for arch in arches:
        train_dpath = arch_to_train_dpath[arch]
        epoch = _epochs[arch]
        load_path = fit_harn2.get_snapshot(train_dpath, epoch=epoch)

        pharn = UrbanPredictHarness(dataset, xpu)
        dataset.center_inputs = pharn.load_normalize_center(train_dpath)

        pharn.test_dump_dpath = ub.ensuredir(
            (workdir, tag, dataset.inputs.input_id, arch,
             'epoch{}'.format(epoch)))

        stitched_dpath = join(pharn.test_dump_dpath, 'stitched')

        cfgstr = util.hash_data([
            # depend on both the inputs and the exact model specs
            dataset.inputs.input_id,
            util.hash_file(load_path)
        ])

        # predict the whole scene
        cacher = ub.Cacher('prediction_stamp',
                           cfgstr=cfgstr,
                           dpath=stitched_dpath)
        if cacher.tryload() is None:
            # Only execute this if we haven't done so
            pharn.load_snapshot(load_path)
            pharn.run()
            cacher.save(True)

        paths = {
            'probs': glob.glob(join(stitched_dpath, 'probs', '*.h5')),
            'probs1': glob.glob(join(stitched_dpath, 'probs1', '*.h5')),
        }
        arch_to_paths[arch] = paths
    return arch_to_paths
Beispiel #5
0
    def prepare_images(self, force=False):
        """
        If not already done, loads paths to images into memory and constructs a
        unique id for that set of im/gt images.

        It the paths are already set, then only the input-id is constructed.
        """
        if self.n_input is not None and not force:
            return

        self.prepare_image_paths()
        print('Preparing {} images'.format(self.tag))

        if self.aux_paths:
            # new way
            depends = sorted(self.paths.items())
        else:
            depends = []
            depends.append(self.im_paths)
            depends.append(self.gt_paths)
            if self.gt_paths:
                # HACK: We will assume image data depends only on the filename
                # HACK: be respectful of gt label changes (ignore aug)
                # stride>1 is faster but might break
                # stride=1 is the safest
                hashes = [
                    util.hash_file(p, stride=32)
                    for p in ub.ProgIter(self.gt_paths, label='hashing')
                    if 'aug' not in basename(p) and 'part' not in basename(p)
                ]
                label_hashid = util.hash_data(hashes)
                depends.append(label_hashid)
        n_im = None if self.im_paths is None else len(self.im_paths)
        n_gt = None if self.gt_paths is None else len(self.gt_paths)
        self.n_input = n_im or n_gt

        hashid = hashutil.hash_data(depends)[:self.abbrev]
        self.input_id = '{}-{}'.format(self.n_input, hashid)

        print(' * n_images = {}'.format(n_im))
        print(' * n_groundtruth = {}'.format(n_gt))
        print(' * input_id = {}'.format(self.input_id))