def preprocess(task, force=False): task.prepare_fullres_inputs() datadir = ub.ensuredir((task.workdir, 'data')) prep = preprocess.Preprocessor(datadir) prep.part_config['overlap'] = .75 prep.ignore_label = task.ignore_label clear = force fullres = task.fullres # task.input_modes['lowres'] = prep.make_lowres(fullres, clear=clear) task.input_modes['part-scale1'] = prep.make_parts( fullres, scale=1, clear=clear)
def load_testing_dataset(test_data_path, workdir): """ Loads a dataset without groundtruth """ task = UrbanMapper3D(root=test_data_path, workdir=workdir, boundary=True) test_fullres = task.load_fullres_inputs('.') if DEBUG: subidx = [0, 1, 2, 3, 4, 5] test_fullres = test_fullres.take(subidx, with_dump=True) prep = preprocess.Preprocessor(ub.ensuredir((task.workdir, 'data_test'))) prep.part_config['overlap'] = 0 if DEBUG else .75 prep.ignore_label = task.ignore_label test_part_inputs = prep.make_parts(test_fullres, scale=1, clear=0) test_dataset = UrbanDataset(test_part_inputs, task) test_dataset.inputs.make_dumpsafe_names() test_dataset.with_gt = False return test_dataset
def load_training_datasets(train_data_path, workdir): """ Loads a dataset with groundtruth and splits it into train / validation """ task = UrbanMapper3D(root=train_data_path, workdir=workdir, boundary=True) fullres = task.load_fullres_inputs('.') if DEBUG: fullres = fullres.take(range(10), with_dump=True) fullres = task.create_boundary_groundtruth(fullres) del fullres.paths['gti'] rng = np.random.RandomState(0) idxs = np.arange(len(fullres)) rng.shuffle(idxs) vali_frac = .15 n_vali = int(len(idxs) * vali_frac) train_idx = idxs[0:-n_vali] vali_idx = idxs[-n_vali:] print('DEBUG = {!r}'.format(DEBUG)) if DEBUG: train_idx = train_idx[0:5] vali_idx = vali_idx[0:5] train_fullres_inputs = fullres.take(train_idx, with_dump=True) vali_fullres_inputs = fullres.take(vali_idx, with_dump=True) prep = preprocess.Preprocessor(ub.ensuredir((task.workdir, 'data_train1'))) # prep.part_config['overlap'] = 0 if DEBUG else .75 prep.part_config['overlap'] = .75 prep.ignore_label = task.ignore_label train_part_inputs = prep.make_parts(train_fullres_inputs, scale=1, clear=0) prep = preprocess.Preprocessor(ub.ensuredir((task.workdir, 'data_vali1'))) prep.part_config['overlap'] = 0 if DEBUG else .75 prep.ignore_label = task.ignore_label vali_part_inputs2 = prep.make_parts(vali_fullres_inputs, scale=1, clear=0) if DEBUG: vali_part_inputs = vali_part_inputs2 else: # Make two versions of vali, one with 75% overlap for stiched # prediction and another with 0 overlap, for loss validation prep = preprocess.Preprocessor(ub.ensuredir((task.workdir, 'data_vali2'))) prep.part_config['overlap'] = 0 prep.ignore_label = task.ignore_label vali_part_inputs = prep.make_parts(vali_fullres_inputs, scale=1, clear=0) train_dataset = UrbanDataset(train_part_inputs, task) vali_dataset = UrbanDataset(vali_part_inputs, task) vali_dataset2 = UrbanDataset(vali_part_inputs2, task) # Shrink epochs by a factor of 16 for more frequent progress train_dataset.epoch_shrink = 16 print('* len(train_dataset) = {}'.format(len(train_dataset))) print('* len(vali_dataset) = {}'.format(len(vali_dataset))) print('* len(vali_dataset2) = {}'.format(len(vali_dataset2))) datasets = { 'train': train_dataset, 'vali': vali_dataset, } datasets['train'].center_inputs = datasets['train']._make_normalizer() datasets['vali'].center_inputs = datasets['train'].center_inputs vali_dataset2.center_inputs = datasets['train'].center_inputs datasets['train'].augment = True return datasets, vali_dataset2