def predict(run_path, model, options, generator, split, save_probs=False): """Generate predictions for split data. For each image in a split, create a prediction image .tif file, and then zip them into a zip file. Do the same for the predicted probability images. # Arguments run_path: the path to the files for a run model: a Keras model that has been trained options: RunOptions object that specifies the run generator: a Generator object to generate the test data split: name of the split eg. validation """ dataset = generator.dataset if save_probs: probs_path = join(run_path, '{}_probs'.format(split)) _makedirs(probs_path) predictions_path = join(run_path, '{}_predictions'.format(split)) _makedirs(predictions_path) split_gen = generator.make_split_generator( split, target_size=options.eval_target_size, batch_size=1, shuffle=False, augment_methods=None, normalize=True, only_xy=False) for sample_ind, batch in enumerate(split_gen): file_ind = batch.file_inds[0] print('Processing {}'.format(file_ind)) x = np.squeeze(batch.x, axis=0) y_probs = make_prediction_img(x, options.target_size[0], lambda x: predict_x(x, model)) if save_probs: probs_file_path = join( probs_path, generator.dataset.get_output_file_name(file_ind)) save_img(y_probs, probs_file_path) y_preds = dataset.one_hot_to_rgb_batch(y_probs) prediction_file_path = join( predictions_path, generator.dataset.get_output_file_name(file_ind)) save_img(y_preds, prediction_file_path) if (options.nb_eval_samples is not None and sample_ind == options.nb_eval_samples - 1): break if save_probs: zip_path = join(run_path, '{}_probs.zip'.format(split)) zip_dir(probs_path, zip_path) rmtree(probs_path) zip_path = join(run_path, '{}_predictions.zip'.format(split)) zip_dir(predictions_path, zip_path) rmtree(predictions_path)
def preprocess(datasets_path): # Fix the depth image that is missing a column if it hasn't been # fixed already. data_path = join(datasets_path, POTSDAM) proc_data_path = join(datasets_path, PROCESSED_POTSDAM) _makedirs(proc_data_path) file_path = join( data_path, '1_DSM_normalisation/dsm_potsdam_03_13_normalized_lastools.jpg') im = load_img(file_path) if im.shape[1] == 5999: im_fix = np.zeros((6000, 6000), dtype=np.uint8) im_fix[:, 0:-1] = im[:, :, 0] save_img(im_fix, file_path) class Options(): def __init__(self): self.active_input_inds = [0, 1, 2, 3, 4] self.train_ratio = 0.8 self.cross_validation = None options = Options() PotsdamImageFileGenerator(datasets_path, options).write_channel_stats(proc_data_path)
def plot_predictions(run_path, options, generator): validation_pred_path = join(run_path, 'validation_preds.csv') validation_plot_path = join(run_path, 'validation_plots') _makedirs(validation_plot_path) validation_pred_tag_store = TagStore(tags_path=validation_pred_path, active_tags=options.active_tags) split_gen = generator.make_split_generator(VALIDATION, target_size=None, batch_size=options.batch_size, shuffle=False, augment_methods=None, normalize=True, only_xy=False) sample_count = 0 plot_sample_count = 0 y_trues = [] y_preds = [] for batch_ind, batch in enumerate(split_gen): for sample_ind in range(batch.x.shape[0]): file_ind = batch.file_inds[sample_ind] all_x = batch.all_x[sample_ind, :, :, :] y_true = generator.tag_store.get_tag_array([file_ind]) y_trues.append(y_true) y_pred = validation_pred_tag_store.get_tag_array([file_ind]) y_preds.append(y_pred) if (options.nb_eval_plot_samples is None or plot_sample_count < options.nb_eval_plot_samples): is_mistake = not np.array_equal(y_true, y_pred) if is_mistake: plot_sample_count += 1 plot_path = join(validation_plot_path, '{}_debug.png'.format(file_ind)) plot_prediction(generator, all_x, y_true[0, :], y_pred[0, :], plot_path) sample_count += 1 if (options.nb_eval_samples is not None and sample_count >= options.nb_eval_samples): break if (options.nb_eval_samples is not None and sample_count >= options.nb_eval_samples): break y_true = np.concatenate(y_trues, axis=0) y_pred = np.concatenate(y_preds, axis=0) if options.nb_eval_samples is not None: y_true = y_true[0:options.nb_eval_samples, :] y_pred = y_pred[0:options.nb_eval_samples, :] return y_true, y_pred
def make_callbacks(self): model_checkpoint = ModelCheckpoint( filepath=join(self.run_path, 'model.h5'), period=1, save_weights_only=True) best_model_checkpoint = ModelCheckpoint( filepath=join(self.run_path, 'best_model.h5'), save_best_only=True, save_weights_only=True) logger = CSVLogger(self.log_path, append=True) callbacks = [model_checkpoint, best_model_checkpoint, logger] # TODO hasattr if self.options.delta_model_checkpoint is not None: exp_path = join(self.run_path, 'delta_model_checkpoints') _makedirs(exp_path) callback = DeltaModelCheckpoint( join(exp_path, 'model_{epoch:0>4}.h5'), acc_delta=self.options.delta_model_checkpoint) callbacks.append(callback) if self.options.patience: callback = ReduceLROnPlateau( verbose=1, epsilon=0.001, patience=self.options.patience) callbacks.append(callback) if self.options.lr_schedule: def get_lr(epoch): for epoch_thresh, lr in self.options.lr_schedule: if epoch >= epoch_thresh: curr_lr = lr else: break return curr_lr callback = LearningRateScheduler(get_lr) callbacks.append(callback) if self.options.lr_epoch_decay: def get_lr(epoch): decay_factor = 1 / (1.0 + self.options.lr_epoch_decay * epoch) return self.options.init_lr * decay_factor callback = LearningRateScheduler(get_lr) callbacks.append(callback) if self.options.cyclic_lr is not None: callback = CyclicLR(base_lr=self.options.base_lr, max_lr=self.options.max_lr, step_size=self.options.step_size, mode=self.options.cycle_mode) callbacks.append(callback) callback = LambdaCallback( on_epoch_end=lambda epoch, logs: self.sync_results()) callbacks.append(callback) return callbacks
def save_to_dir(self, experiments, path): if not self.has_unique_run_names(experiments): raise ValueError('Each run_name needs to be unique.') for exp_ind, exp in enumerate(experiments): self.parse_experiment(exp) json_str = json.dumps(exp, sort_keys=True, indent=4) exp_path = join(path, 'experiments', '{}.json'.format(exp_ind)) _makedirs(dirname(exp_path)) with open(exp_path, 'w') as exp_file: exp_file.write(json_str)
def preprocess(datasets_path): proc_data_path = join(datasets_path, PROCESSED_POTSDAM) _makedirs(proc_data_path) class Options(): def __init__(self): self.active_input_inds = [0, 1, 2, 3, 4] self.train_ratio = 0.8 self.cross_validation = None options = Options() generator = PotsdamImageFileGenerator(datasets_path, options) dataset = generator.dataset def _preprocess(split): gen = generator.make_split_generator(split, batch_size=1, shuffle=False, augment_methods=None, normalize=False, only_xy=False) for batch in gen: print('.') file_ind = batch.file_inds[0] x = np.squeeze(batch.x, axis=0) channels = [x] if batch.y is not None: y = np.squeeze(batch.y, axis=0) y = dataset.one_hot_to_label_batch(y) y_mask = np.squeeze(batch.y_mask, axis=0) channels.extend([y, y_mask]) channels = np.concatenate(channels, axis=2) ind0, ind1 = file_ind file_name = '{}_{}'.format(ind0, ind1) save_numpy_array(join(proc_data_path, file_name), channels) # Free memory channels = None batch.all_x = None batch.x = x = None batch.y = y = None batch.y_mask = y_mask = None _preprocess(TRAIN) _preprocess(VALIDATION) _preprocess(TEST) PotsdamNumpyFileGenerator(datasets_path, options).write_channel_stats(proc_data_path)
def preprocess(datasets_path): proc_data_path = join(datasets_path, PROCESSED_VAIHINGEN) _makedirs(proc_data_path) class Options(): def __init__(self): self.active_input_inds = [0, 1, 2, 3] self.train_ratio = 0.8 self.cross_validation = None options = Options() VaihingenImageFileGenerator( datasets_path, options).write_channel_stats(proc_data_path)
def setup_run(self): """Setup path for the results of a run. Creates directory if doesn't exist, downloads results from cloud, and write the options to <run_path>/options.json """ if not isdir(self.run_path): self.sync_results(download=True) _makedirs(self.run_path) options_path = join(self.run_path, 'options.json') save_json(self.options.__dict__, options_path)
def preprocess(datasets_path): PlanetKaggleFileGenerator.preprocess(datasets_path) proc_data_path = join(datasets_path, PLANET_KAGGLE) _makedirs(proc_data_path) class Options(): def __init__(self): self.active_input_inds = [0, 1, 2] self.train_ratio = 0.8 self.cross_validation = None self.active_tags_prob = None self.active_tags = None options = Options() PlanetKaggleJpgFileGenerator( datasets_path, options).write_channel_stats(proc_data_path)
def make_video(x, y, all_x, models, videos_path, video_ind, options, generator): video_path = join(videos_path, str(video_ind)) _makedirs(video_path) for frame_ind, model in enumerate(models): y_pred = make_prediction_img( x, options.target_size[0], lambda x: generator.dataset.one_hot_to_rgb_batch( predict_x(x, model))) print(video_ind) print(frame_ind) frame_path = join(video_path, 'frame_{:0>4}.png'.format(frame_ind)) plot_prediction(generator, all_x, y, y_pred, frame_path) frames_path = join(video_path, 'frame_%04d.png') video_path = join(videos_path, '{}.mp4'.format(video_ind)) call([ 'avconv', '-r', '2', '-i', frames_path, '-vf', 'scale=trunc(in_w/2)*2:trunc(in_h/2)*2', video_path ])
def plot_generator(self, dataset_name, generator_name, split): nb_batches = 2 batch_size = 4 class Options(): def __init__(self): self.dataset_name = dataset_name self.generator_name = generator_name self.active_input_inds = [0, 1, 2] if generator_name == TIFF: self.active_input_inds = [0, 1, 2, 3] self.train_ratio = 0.8 self.cross_validation = None self.augment_methods = [HFLIP, VFLIP, ROTATE, TRANSLATE] self.active_tags = None self.active_tags_prob = None options = Options() generator = self.get_data_generator(options) viz_path = join(self.results_path, 'gen_samples', dataset_name, generator_name, split) _makedirs(viz_path) gen = generator.make_split_generator( split, batch_size=batch_size, shuffle=True, augment_methods=options.augment_methods, normalize=True, only_xy=False) for batch_ind in range(nb_batches): batch = next(gen) for sample_ind in range(batch_size): file_path = join(viz_path, '{}_{}.pdf'.format(batch_ind, sample_ind)) generator.plot_sample(file_path, batch.all_x[sample_ind, :], batch.y[sample_ind, :], batch.file_inds[sample_ind])
def make_videos(run_path, options, generator): model_factory = SemsegModelFactory() videos_path = join(run_path, 'videos') _makedirs(videos_path) checkpoints_path = join(run_path, 'delta_model_checkpoints') if not isdir(checkpoints_path): print('Cannot make videos without delta_model_checkpoints.') return model_paths = glob.glob(join(checkpoints_path, '*.h5')) model_paths.sort() models = [] for model_path in model_paths: model = model_factory.make_model(options, generator) model.load_weights(model_path, by_name=True) models.append(model) split_gen = generator.make_split_generator( VALIDATION, target_size=options.eval_target_size, batch_size=1, shuffle=False, augment_methods=None, normalize=True, only_xy=False) for video_ind, batch in \ enumerate(split_gen): x = np.squeeze(batch.x, axis=0) y = np.squeeze(batch.y, axis=0) display_y = generator.dataset.one_hot_to_rgb_batch(y) all_x = np.squeeze(batch.all_x, axis=0) make_video(x, display_y, all_x, models, videos_path, video_ind, options, generator) if video_ind == options.nb_videos - 1: break
def validation_eval(run_path, model, options, generator): """Evaluate model on validation data. For each validation image, make a prediction, plot the prediction along with the ground truth, and increment a confusion matrix. After all validation images have been processed, compute and save scores based on the confusion matrix. This allows us to compute scores for datasets that cannot fit into memory. # Arguments run_path: the path to the files for a run model: a Keras model that has been trained options: RunOptions object that specifies the run generator: a Generator object to generate the test data """ dataset = generator.dataset label_names = dataset.label_names validation_gen = generator.make_split_generator( VALIDATION, target_size=options.eval_target_size, batch_size=1, shuffle=False, augment_methods=None, normalize=True, only_xy=False) confusion_mat = np.zeros((dataset.nb_labels, dataset.nb_labels)) predictions_path = join(run_path, 'validation_eval') _makedirs(predictions_path) for sample_index, batch in enumerate(validation_gen): file_ind = batch.file_inds[0] print('Processing {}'.format(file_ind)) x = np.squeeze(batch.x, axis=0) all_x = np.squeeze(batch.all_x, axis=0) y = np.squeeze(batch.y, axis=0) y_mask = np.squeeze(batch.y_mask, axis=0) display_pred = make_prediction_img( x, options.target_size[0], lambda x: dataset.one_hot_to_rgb_batch(predict_x(x, model))) display_y = dataset.one_hot_to_rgb_batch(y) label_y = dataset.one_hot_to_label_batch(y) label_pred = dataset.rgb_to_label_batch(display_pred) confusion_mat += compute_confusion_mat(label_y, y_mask, label_pred, dataset.nb_labels) if (options.nb_eval_plot_samples is not None and sample_index < options.nb_eval_plot_samples): file_path = '{}.png'.format(sample_index) file_path = join(predictions_path, file_path) plot_prediction(generator, all_x, display_y, display_pred, file_path, is_debug=True) if (options.nb_eval_samples is not None and sample_index == options.nb_eval_samples - 1): break scores = Scores() scores.compute_scores(label_names, confusion_mat) save_scores(scores, run_path)