def setup_dpath(self, short=True, hashed=True): train_info = self.train_info(short, hashed) train_dpath = ub.ensuredir(train_info['train_dpath']) train_info_fpath = join(train_dpath, 'train_info.json') util.write_json(train_info_fpath, train_info) # setup symlinks ub.ensuredir(dirname(train_info['link_dpath'])) ub.symlink(train_info['train_dpath'], train_info['link_dpath'], overwrite=True, verbose=3) if train_info['nice_dpath']: ub.ensuredir(dirname(train_info['nice_dpath'])) ub.symlink(train_info['train_dpath'], train_info['nice_dpath'], overwrite=True, verbose=3) print('+=========') # print('hyper_strid = {!r}'.format(params.hyper_id())) # print('train_init_id = {!r}'.format(train_info['input_id'])) # print('arch = {!r}'.format(train_info['arch_id'])) # print('train_hyper_hashid = {!r}'.format(train_info['train_hyper_hashid'])) print('hyper = {}'.format(ub.repr2(train_info['hyper'], nl=3))) print('train_hyper_id_brief = {!r}'.format(train_info['train_hyper_id_brief'])) print('train_id = {!r}'.format(train_info['train_id'])) print('+=========') return train_info
def directory_structure(workdir, arch, datasets, pretrained=None, train_hyper_id=None, suffix=''): """ from .torch.urban_train import * datasets = load_task_dataset('urban_mapper_3d') datasets['train']._make_normalizer() arch = 'foobar' workdir = datasets['train'].task.workdir ut.exec_funckw(directory_structure, globals()) """ arch_dpath = ub.ensuredir((workdir, 'arch', arch)) train_base = ub.ensuredir((arch_dpath, 'train')) if pretrained is None: train_init_id = 'None' elif len(pretrained) < 8: train_init_id = pretrained else: train_init_id = util.hash_data(pretrained)[:8] train_hyper_hashid = util.hash_data(train_hyper_id)[:8] train_id = '{}_{}_{}_{}'.format(datasets['train'].input_id, arch, train_init_id, train_hyper_hashid) + suffix train_dpath = ub.ensuredir( (train_base, 'input_' + datasets['train'].input_id, 'solver_{}'.format(train_id))) train_info = { 'arch': arch, 'train_id': datasets['train'].input_id, 'train_hyper_id': train_hyper_id, 'train_hyper_hashid': train_hyper_hashid, 'colorspace': datasets['train'].colorspace, } if hasattr(datasets['train'], 'center_inputs'): # Hack in centering information train_info['hack_centers'] = [ (t.__class__.__name__, t.__getstate__()) # ub.map_vals(str, t.__dict__) for t in datasets['train'].center_inputs.transforms ] util.write_json(join(train_dpath, 'train_info.json'), train_info) print('+=========') # print('hyper_strid = {!r}'.format(params.hyper_id())) print('train_init_id = {!r}'.format(train_init_id)) print('arch = {!r}'.format(arch)) print('train_hyper_hashid = {!r}'.format(train_hyper_hashid)) print('train_hyper_id = {!r}'.format(train_hyper_id)) print('train_id = {!r}'.format(train_id)) print('+=========') return train_dpath
def fit_networks(datasets, xpu): print('datasets = {}'.format(datasets)) n_classes = datasets['train'].n_classes n_channels = datasets['train'].n_channels class_weights = datasets['train'].class_weights() ignore_label = datasets['train'].ignore_label print('n_classes = {!r}'.format(n_classes)) print('n_channels = {!r}'.format(n_channels)) arches = [ 'unet2', 'dense_unet', ] arch_to_train_dpath = {} arch_to_best_epochs = {} for arch in arches: hyper = hyperparams.HyperParams( criterion=(criterions.CrossEntropyLoss2D, { 'ignore_label': ignore_label, # TODO: weight should be a FloatTensor 'weight': class_weights, }), optimizer=(torch.optim.SGD, { # 'weight_decay': .0006, 'weight_decay': .0005, 'momentum': .9, 'nesterov': True, }), scheduler=('Exponential', { 'gamma': 0.99, 'base_lr': 0.001, 'stepsize': 2, }), other={ 'n_classes': n_classes, 'n_channels': n_channels, 'augment': datasets['train'].augment, 'colorspace': datasets['train'].colorspace, } ) train_dpath = ub.ensuredir((datasets['train'].task.workdir, 'train', arch)) train_info = { 'arch': arch, 'train_id': datasets['train'].input_id, 'train_hyper_id': hyper.hyper_id(), 'colorspace': datasets['train'].colorspace, # Hack in centering information 'hack_centers': [ (t.__class__.__name__, t.__getstate__()) for t in datasets['train'].center_inputs.transforms ] } util.write_json(join(train_dpath, 'train_info.json'), train_info) arch_to_train_dpath[arch] = train_dpath if arch == 'unet2': batch_size = 14 model = unet2.UNet2(n_alt_classes=3, in_channels=n_channels, n_classes=n_classes, nonlinearity='leaky_relu') elif arch == 'dense_unet': batch_size = 6 model = unet3.DenseUNet(n_alt_classes=3, in_channels=n_channels, n_classes=n_classes) dry = 0 harn = fit_harn2.FitHarness( model=model, hyper=hyper, datasets=datasets, xpu=xpu, train_dpath=train_dpath, dry=dry, batch_size=batch_size, ) harn.criterion2 = criterions.CrossEntropyLoss2D( weight=torch.FloatTensor([0.1, 1.0, 0.0]), ignore_label=2 ) if DEBUG: harn.config['max_iter'] = 30 else: # Note on aretha we can do 140 epochs in 7 days, so # be careful with how long we take to train. # With a reduction of 16, we can take a few more epochs # Unet2 take ~10 minutes to get through one # with num_workers=0, we have 374.00s/it = 6.23 m/it # this comes down to 231 epochs per day # harn.config['max_iter'] = 432 # 3 days max harn.config['max_iter'] = 200 # ~1 day max (if multiprocessing works) harn.early_stop.patience = 10 def compute_loss(harn, outputs, labels): output1, output2 = outputs label1, label2 = labels # Compute the loss loss1 = harn.criterion(output1, label1) loss2 = harn.criterion2(output2, label2) loss = (.45 * loss1 + .55 * loss2) return loss harn.compute_loss = compute_loss def custom_metrics(harn, output, label): ignore_label = datasets['train'].ignore_label labels = datasets['train'].task.labels metrics_dict = metrics._sseg_metrics(output[1], label[1], labels=labels, ignore_label=ignore_label) return metrics_dict harn.add_metric_hook(custom_metrics) harn.run() arch_to_best_epochs[arch] = harn.early_stop.best_epochs() # Select model and hyperparams print('arch_to_train_dpath = {}'.format(ub.repr2(arch_to_train_dpath, nl=1))) print('arch_to_best_epochs = {}'.format(ub.repr2(arch_to_best_epochs, nl=1))) return arch_to_train_dpath, arch_to_best_epochs