Exemple #1
0
    def setup_dpath(self, short=True, hashed=True):
        train_info = self.train_info(short, hashed)

        train_dpath = ub.ensuredir(train_info['train_dpath'])
        train_info_fpath = join(train_dpath, 'train_info.json')

        util.write_json(train_info_fpath, train_info)

        # setup symlinks
        ub.ensuredir(dirname(train_info['link_dpath']))
        ub.symlink(train_info['train_dpath'], train_info['link_dpath'],
                   overwrite=True, verbose=3)

        if train_info['nice_dpath']:
            ub.ensuredir(dirname(train_info['nice_dpath']))
            ub.symlink(train_info['train_dpath'], train_info['nice_dpath'],
                       overwrite=True, verbose=3)

        print('+=========')
        # print('hyper_strid = {!r}'.format(params.hyper_id()))
        # print('train_init_id = {!r}'.format(train_info['input_id']))
        # print('arch = {!r}'.format(train_info['arch_id']))
        # print('train_hyper_hashid = {!r}'.format(train_info['train_hyper_hashid']))
        print('hyper = {}'.format(ub.repr2(train_info['hyper'], nl=3)))
        print('train_hyper_id_brief = {!r}'.format(train_info['train_hyper_id_brief']))
        print('train_id = {!r}'.format(train_info['train_id']))
        print('+=========')
        return train_info
Exemple #2
0
def directory_structure(workdir,
                        arch,
                        datasets,
                        pretrained=None,
                        train_hyper_id=None,
                        suffix=''):
    """
    from .torch.urban_train import *
    datasets = load_task_dataset('urban_mapper_3d')
    datasets['train']._make_normalizer()
    arch = 'foobar'
    workdir = datasets['train'].task.workdir
    ut.exec_funckw(directory_structure, globals())
    """
    arch_dpath = ub.ensuredir((workdir, 'arch', arch))
    train_base = ub.ensuredir((arch_dpath, 'train'))

    if pretrained is None:
        train_init_id = 'None'
    elif len(pretrained) < 8:
        train_init_id = pretrained
    else:
        train_init_id = util.hash_data(pretrained)[:8]

    train_hyper_hashid = util.hash_data(train_hyper_id)[:8]

    train_id = '{}_{}_{}_{}'.format(datasets['train'].input_id, arch,
                                    train_init_id, train_hyper_hashid) + suffix

    train_dpath = ub.ensuredir(
        (train_base, 'input_' + datasets['train'].input_id,
         'solver_{}'.format(train_id)))

    train_info = {
        'arch': arch,
        'train_id': datasets['train'].input_id,
        'train_hyper_id': train_hyper_id,
        'train_hyper_hashid': train_hyper_hashid,
        'colorspace': datasets['train'].colorspace,
    }
    if hasattr(datasets['train'], 'center_inputs'):
        # Hack in centering information
        train_info['hack_centers'] = [
            (t.__class__.__name__, t.__getstate__())
            # ub.map_vals(str, t.__dict__)
            for t in datasets['train'].center_inputs.transforms
        ]
    util.write_json(join(train_dpath, 'train_info.json'), train_info)

    print('+=========')
    # print('hyper_strid = {!r}'.format(params.hyper_id()))
    print('train_init_id = {!r}'.format(train_init_id))
    print('arch = {!r}'.format(arch))
    print('train_hyper_hashid = {!r}'.format(train_hyper_hashid))
    print('train_hyper_id = {!r}'.format(train_hyper_id))
    print('train_id = {!r}'.format(train_id))
    print('+=========')

    return train_dpath
Exemple #3
0
def fit_networks(datasets, xpu):
    print('datasets = {}'.format(datasets))
    n_classes = datasets['train'].n_classes
    n_channels = datasets['train'].n_channels
    class_weights = datasets['train'].class_weights()
    ignore_label = datasets['train'].ignore_label

    print('n_classes = {!r}'.format(n_classes))
    print('n_channels = {!r}'.format(n_channels))

    arches = [
        'unet2',
        'dense_unet',
    ]

    arch_to_train_dpath = {}
    arch_to_best_epochs = {}

    for arch in arches:

        hyper = hyperparams.HyperParams(
            criterion=(criterions.CrossEntropyLoss2D, {
                'ignore_label': ignore_label,
                # TODO: weight should be a FloatTensor
                'weight': class_weights,
            }),
            optimizer=(torch.optim.SGD, {
                # 'weight_decay': .0006,
                'weight_decay': .0005,
                'momentum': .9,
                'nesterov': True,
            }),
            scheduler=('Exponential', {
                'gamma': 0.99,
                'base_lr': 0.001,
                'stepsize': 2,
            }),
            other={
                'n_classes': n_classes,
                'n_channels': n_channels,
                'augment': datasets['train'].augment,
                'colorspace': datasets['train'].colorspace,
            }
        )

        train_dpath = ub.ensuredir((datasets['train'].task.workdir, 'train', arch))

        train_info =  {
            'arch': arch,
            'train_id': datasets['train'].input_id,
            'train_hyper_id': hyper.hyper_id(),
            'colorspace': datasets['train'].colorspace,
            # Hack in centering information
            'hack_centers': [
                (t.__class__.__name__, t.__getstate__())
                for t in datasets['train'].center_inputs.transforms
            ]
        }
        util.write_json(join(train_dpath, 'train_info.json'), train_info)

        arch_to_train_dpath[arch] = train_dpath

        if arch == 'unet2':
            batch_size = 14
            model = unet2.UNet2(n_alt_classes=3, in_channels=n_channels,
                                n_classes=n_classes, nonlinearity='leaky_relu')
        elif arch == 'dense_unet':
            batch_size = 6
            model = unet3.DenseUNet(n_alt_classes=3, in_channels=n_channels,
                                    n_classes=n_classes)

        dry = 0
        harn = fit_harn2.FitHarness(
            model=model, hyper=hyper, datasets=datasets, xpu=xpu,
            train_dpath=train_dpath, dry=dry,
            batch_size=batch_size,
        )
        harn.criterion2 = criterions.CrossEntropyLoss2D(
            weight=torch.FloatTensor([0.1, 1.0, 0.0]),
            ignore_label=2
        )
        if DEBUG:
            harn.config['max_iter'] = 30
        else:

            # Note on aretha we can do 140 epochs in 7 days, so
            # be careful with how long we take to train.
            # With a reduction of 16, we can take a few more epochs
            # Unet2 take ~10 minutes to get through one

            # with num_workers=0, we have 374.00s/it = 6.23 m/it
            # this comes down to 231 epochs per day
            # harn.config['max_iter'] = 432  # 3 days max
            harn.config['max_iter'] = 200  # ~1 day max (if multiprocessing works)
        harn.early_stop.patience = 10

        def compute_loss(harn, outputs, labels):

            output1, output2 = outputs
            label1, label2 = labels

            # Compute the loss
            loss1 = harn.criterion(output1, label1)
            loss2 = harn.criterion2(output2, label2)
            loss = (.45 * loss1 + .55 * loss2)
            return loss

        harn.compute_loss = compute_loss

        def custom_metrics(harn, output, label):
            ignore_label = datasets['train'].ignore_label
            labels = datasets['train'].task.labels

            metrics_dict = metrics._sseg_metrics(output[1], label[1],
                                                 labels=labels,
                                                 ignore_label=ignore_label)
            return metrics_dict

        harn.add_metric_hook(custom_metrics)

        harn.run()
        arch_to_best_epochs[arch] = harn.early_stop.best_epochs()

    # Select model and hyperparams
    print('arch_to_train_dpath = {}'.format(ub.repr2(arch_to_train_dpath, nl=1)))
    print('arch_to_best_epochs = {}'.format(ub.repr2(arch_to_best_epochs, nl=1)))
    return arch_to_train_dpath, arch_to_best_epochs