예제 #1
0
파일: predict.py 프로젝트: rahulsingh24/ML
def predict(run_path, model, options, generator, split, save_probs=False):
    """Generate predictions for split data.

    For each image in a split, create a prediction image .tif file, and then
    zip them into a zip file. Do the same for the predicted probability images.

    # Arguments
        run_path: the path to the files for a run
        model: a Keras model that has been trained
        options: RunOptions object that specifies the run
        generator: a Generator object to generate the test data
        split: name of the split eg. validation
    """
    dataset = generator.dataset
    if save_probs:
        probs_path = join(run_path, '{}_probs'.format(split))
        _makedirs(probs_path)
    predictions_path = join(run_path, '{}_predictions'.format(split))
    _makedirs(predictions_path)

    split_gen = generator.make_split_generator(
        split,
        target_size=options.eval_target_size,
        batch_size=1,
        shuffle=False,
        augment_methods=None,
        normalize=True,
        only_xy=False)

    for sample_ind, batch in enumerate(split_gen):
        file_ind = batch.file_inds[0]
        print('Processing {}'.format(file_ind))

        x = np.squeeze(batch.x, axis=0)

        y_probs = make_prediction_img(x, options.target_size[0],
                                      lambda x: predict_x(x, model))

        if save_probs:
            probs_file_path = join(
                probs_path, generator.dataset.get_output_file_name(file_ind))
            save_img(y_probs, probs_file_path)

        y_preds = dataset.one_hot_to_rgb_batch(y_probs)
        prediction_file_path = join(
            predictions_path, generator.dataset.get_output_file_name(file_ind))
        save_img(y_preds, prediction_file_path)

        if (options.nb_eval_samples is not None
                and sample_ind == options.nb_eval_samples - 1):
            break

    if save_probs:
        zip_path = join(run_path, '{}_probs.zip'.format(split))
        zip_dir(probs_path, zip_path)
        rmtree(probs_path)

    zip_path = join(run_path, '{}_predictions.zip'.format(split))
    zip_dir(predictions_path, zip_path)
    rmtree(predictions_path)
예제 #2
0
파일: potsdam.py 프로젝트: rahulsingh24/ML
    def preprocess(datasets_path):
        # Fix the depth image that is missing a column if it hasn't been
        # fixed already.
        data_path = join(datasets_path, POTSDAM)
        proc_data_path = join(datasets_path, PROCESSED_POTSDAM)
        _makedirs(proc_data_path)

        file_path = join(
            data_path,
            '1_DSM_normalisation/dsm_potsdam_03_13_normalized_lastools.jpg')

        im = load_img(file_path)
        if im.shape[1] == 5999:
            im_fix = np.zeros((6000, 6000), dtype=np.uint8)
            im_fix[:, 0:-1] = im[:, :, 0]
            save_img(im_fix, file_path)

        class Options():
            def __init__(self):
                self.active_input_inds = [0, 1, 2, 3, 4]
                self.train_ratio = 0.8
                self.cross_validation = None

        options = Options()
        PotsdamImageFileGenerator(datasets_path,
                                  options).write_channel_stats(proc_data_path)
예제 #3
0
def plot_predictions(run_path, options, generator):
    validation_pred_path = join(run_path, 'validation_preds.csv')

    validation_plot_path = join(run_path, 'validation_plots')
    _makedirs(validation_plot_path)

    validation_pred_tag_store = TagStore(tags_path=validation_pred_path,
                                         active_tags=options.active_tags)
    split_gen = generator.make_split_generator(VALIDATION,
                                               target_size=None,
                                               batch_size=options.batch_size,
                                               shuffle=False,
                                               augment_methods=None,
                                               normalize=True,
                                               only_xy=False)

    sample_count = 0
    plot_sample_count = 0
    y_trues = []
    y_preds = []
    for batch_ind, batch in enumerate(split_gen):
        for sample_ind in range(batch.x.shape[0]):
            file_ind = batch.file_inds[sample_ind]
            all_x = batch.all_x[sample_ind, :, :, :]

            y_true = generator.tag_store.get_tag_array([file_ind])
            y_trues.append(y_true)
            y_pred = validation_pred_tag_store.get_tag_array([file_ind])
            y_preds.append(y_pred)

            if (options.nb_eval_plot_samples is None
                    or plot_sample_count < options.nb_eval_plot_samples):
                is_mistake = not np.array_equal(y_true, y_pred)
                if is_mistake:
                    plot_sample_count += 1
                    plot_path = join(validation_plot_path,
                                     '{}_debug.png'.format(file_ind))
                    plot_prediction(generator, all_x, y_true[0, :],
                                    y_pred[0, :], plot_path)

            sample_count += 1

            if (options.nb_eval_samples is not None
                    and sample_count >= options.nb_eval_samples):
                break

        if (options.nb_eval_samples is not None
                and sample_count >= options.nb_eval_samples):
            break

    y_true = np.concatenate(y_trues, axis=0)
    y_pred = np.concatenate(y_preds, axis=0)
    if options.nb_eval_samples is not None:
        y_true = y_true[0:options.nb_eval_samples, :]
        y_pred = y_pred[0:options.nb_eval_samples, :]

    return y_true, y_pred
예제 #4
0
    def make_callbacks(self):
        model_checkpoint = ModelCheckpoint(
            filepath=join(self.run_path, 'model.h5'), period=1,
            save_weights_only=True)

        best_model_checkpoint = ModelCheckpoint(
            filepath=join(self.run_path, 'best_model.h5'), save_best_only=True,
            save_weights_only=True)
        logger = CSVLogger(self.log_path, append=True)
        callbacks = [model_checkpoint, best_model_checkpoint, logger]

        # TODO hasattr
        if self.options.delta_model_checkpoint is not None:
            exp_path = join(self.run_path, 'delta_model_checkpoints')
            _makedirs(exp_path)
            callback = DeltaModelCheckpoint(
                join(exp_path, 'model_{epoch:0>4}.h5'),
                acc_delta=self.options.delta_model_checkpoint)
            callbacks.append(callback)

        if self.options.patience:
            callback = ReduceLROnPlateau(
                verbose=1, epsilon=0.001, patience=self.options.patience)
            callbacks.append(callback)

        if self.options.lr_schedule:
            def get_lr(epoch):
                for epoch_thresh, lr in self.options.lr_schedule:
                    if epoch >= epoch_thresh:
                        curr_lr = lr
                    else:
                        break
                return curr_lr
            callback = LearningRateScheduler(get_lr)
            callbacks.append(callback)

        if self.options.lr_epoch_decay:
            def get_lr(epoch):
                decay_factor = 1 / (1.0 + self.options.lr_epoch_decay * epoch)
                return self.options.init_lr * decay_factor
            callback = LearningRateScheduler(get_lr)
            callbacks.append(callback)

        if self.options.cyclic_lr is not None:
            callback = CyclicLR(base_lr=self.options.base_lr,
                                max_lr=self.options.max_lr,
                                step_size=self.options.step_size,
                                mode=self.options.cycle_mode)
            callbacks.append(callback)

        callback = LambdaCallback(
            on_epoch_end=lambda epoch, logs: self.sync_results())
        callbacks.append(callback)

        return callbacks
예제 #5
0
    def save_to_dir(self, experiments, path):
        if not self.has_unique_run_names(experiments):
            raise ValueError('Each run_name needs to be unique.')

        for exp_ind, exp in enumerate(experiments):
            self.parse_experiment(exp)

            json_str = json.dumps(exp, sort_keys=True, indent=4)
            exp_path = join(path, 'experiments', '{}.json'.format(exp_ind))
            _makedirs(dirname(exp_path))
            with open(exp_path, 'w') as exp_file:
                exp_file.write(json_str)
예제 #6
0
파일: potsdam.py 프로젝트: rahulsingh24/ML
    def preprocess(datasets_path):
        proc_data_path = join(datasets_path, PROCESSED_POTSDAM)
        _makedirs(proc_data_path)

        class Options():
            def __init__(self):
                self.active_input_inds = [0, 1, 2, 3, 4]
                self.train_ratio = 0.8
                self.cross_validation = None

        options = Options()
        generator = PotsdamImageFileGenerator(datasets_path, options)
        dataset = generator.dataset

        def _preprocess(split):
            gen = generator.make_split_generator(split,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 augment_methods=None,
                                                 normalize=False,
                                                 only_xy=False)

            for batch in gen:
                print('.')
                file_ind = batch.file_inds[0]
                x = np.squeeze(batch.x, axis=0)
                channels = [x]

                if batch.y is not None:
                    y = np.squeeze(batch.y, axis=0)
                    y = dataset.one_hot_to_label_batch(y)
                    y_mask = np.squeeze(batch.y_mask, axis=0)
                    channels.extend([y, y_mask])
                channels = np.concatenate(channels, axis=2)

                ind0, ind1 = file_ind
                file_name = '{}_{}'.format(ind0, ind1)
                save_numpy_array(join(proc_data_path, file_name), channels)

                # Free memory
                channels = None
                batch.all_x = None
                batch.x = x = None
                batch.y = y = None
                batch.y_mask = y_mask = None

        _preprocess(TRAIN)
        _preprocess(VALIDATION)
        _preprocess(TEST)

        PotsdamNumpyFileGenerator(datasets_path,
                                  options).write_channel_stats(proc_data_path)
예제 #7
0
    def preprocess(datasets_path):
        proc_data_path = join(datasets_path, PROCESSED_VAIHINGEN)
        _makedirs(proc_data_path)

        class Options():
            def __init__(self):
                self.active_input_inds = [0, 1, 2, 3]
                self.train_ratio = 0.8
                self.cross_validation = None

        options = Options()
        VaihingenImageFileGenerator(
            datasets_path, options).write_channel_stats(proc_data_path)
예제 #8
0
    def setup_run(self):
        """Setup path for the results of a run.

        Creates directory if doesn't exist, downloads results from cloud, and
        write the options to <run_path>/options.json
        """
        if not isdir(self.run_path):
            self.sync_results(download=True)

        _makedirs(self.run_path)

        options_path = join(self.run_path, 'options.json')
        save_json(self.options.__dict__, options_path)
예제 #9
0
    def preprocess(datasets_path):
        PlanetKaggleFileGenerator.preprocess(datasets_path)

        proc_data_path = join(datasets_path, PLANET_KAGGLE)
        _makedirs(proc_data_path)

        class Options():
            def __init__(self):
                self.active_input_inds = [0, 1, 2]
                self.train_ratio = 0.8
                self.cross_validation = None
                self.active_tags_prob = None
                self.active_tags = None

        options = Options()
        PlanetKaggleJpgFileGenerator(
            datasets_path, options).write_channel_stats(proc_data_path)
예제 #10
0
def make_video(x, y, all_x, models, videos_path, video_ind, options,
               generator):
    video_path = join(videos_path, str(video_ind))
    _makedirs(video_path)

    for frame_ind, model in enumerate(models):
        y_pred = make_prediction_img(
            x, options.target_size[0],
            lambda x: generator.dataset.one_hot_to_rgb_batch(
                predict_x(x, model)))
        print(video_ind)
        print(frame_ind)
        frame_path = join(video_path, 'frame_{:0>4}.png'.format(frame_ind))
        plot_prediction(generator, all_x, y, y_pred, frame_path)

    frames_path = join(video_path, 'frame_%04d.png')
    video_path = join(videos_path, '{}.mp4'.format(video_ind))
    call([
        'avconv', '-r', '2', '-i', frames_path, '-vf',
        'scale=trunc(in_w/2)*2:trunc(in_h/2)*2', video_path
    ])
예제 #11
0
    def plot_generator(self, dataset_name, generator_name, split):
        nb_batches = 2
        batch_size = 4

        class Options():
            def __init__(self):
                self.dataset_name = dataset_name
                self.generator_name = generator_name
                self.active_input_inds = [0, 1, 2]
                if generator_name == TIFF:
                    self.active_input_inds = [0, 1, 2, 3]
                self.train_ratio = 0.8
                self.cross_validation = None
                self.augment_methods = [HFLIP, VFLIP, ROTATE, TRANSLATE]
                self.active_tags = None
                self.active_tags_prob = None

        options = Options()
        generator = self.get_data_generator(options)

        viz_path = join(self.results_path, 'gen_samples', dataset_name,
                        generator_name, split)
        _makedirs(viz_path)

        gen = generator.make_split_generator(
            split,
            batch_size=batch_size,
            shuffle=True,
            augment_methods=options.augment_methods,
            normalize=True,
            only_xy=False)

        for batch_ind in range(nb_batches):
            batch = next(gen)
            for sample_ind in range(batch_size):
                file_path = join(viz_path,
                                 '{}_{}.pdf'.format(batch_ind, sample_ind))
                generator.plot_sample(file_path, batch.all_x[sample_ind, :],
                                      batch.y[sample_ind, :],
                                      batch.file_inds[sample_ind])
예제 #12
0
def make_videos(run_path, options, generator):
    model_factory = SemsegModelFactory()
    videos_path = join(run_path, 'videos')
    _makedirs(videos_path)

    checkpoints_path = join(run_path, 'delta_model_checkpoints')
    if not isdir(checkpoints_path):
        print('Cannot make videos without delta_model_checkpoints.')
        return

    model_paths = glob.glob(join(checkpoints_path, '*.h5'))
    model_paths.sort()
    models = []
    for model_path in model_paths:
        model = model_factory.make_model(options, generator)
        model.load_weights(model_path, by_name=True)
        models.append(model)

    split_gen = generator.make_split_generator(
        VALIDATION,
        target_size=options.eval_target_size,
        batch_size=1,
        shuffle=False,
        augment_methods=None,
        normalize=True,
        only_xy=False)

    for video_ind, batch in \
            enumerate(split_gen):
        x = np.squeeze(batch.x, axis=0)
        y = np.squeeze(batch.y, axis=0)
        display_y = generator.dataset.one_hot_to_rgb_batch(y)
        all_x = np.squeeze(batch.all_x, axis=0)

        make_video(x, display_y, all_x, models, videos_path, video_ind,
                   options, generator)

        if video_ind == options.nb_videos - 1:
            break
예제 #13
0
def validation_eval(run_path, model, options, generator):
    """Evaluate model on validation data.

    For each validation image, make a prediction, plot the prediction along
    with the ground truth, and increment a confusion matrix. After all
    validation images have been processed, compute and save scores based on the
    confusion matrix. This allows us to compute scores for datasets that cannot
    fit into memory.

    # Arguments
        run_path: the path to the files for a run
        model: a Keras model that has been trained
        options: RunOptions object that specifies the run
        generator: a Generator object to generate the test data
    """
    dataset = generator.dataset
    label_names = dataset.label_names

    validation_gen = generator.make_split_generator(
        VALIDATION,
        target_size=options.eval_target_size,
        batch_size=1,
        shuffle=False,
        augment_methods=None,
        normalize=True,
        only_xy=False)

    confusion_mat = np.zeros((dataset.nb_labels, dataset.nb_labels))
    predictions_path = join(run_path, 'validation_eval')
    _makedirs(predictions_path)

    for sample_index, batch in enumerate(validation_gen):
        file_ind = batch.file_inds[0]
        print('Processing {}'.format(file_ind))

        x = np.squeeze(batch.x, axis=0)
        all_x = np.squeeze(batch.all_x, axis=0)
        y = np.squeeze(batch.y, axis=0)
        y_mask = np.squeeze(batch.y_mask, axis=0)

        display_pred = make_prediction_img(
            x, options.target_size[0],
            lambda x: dataset.one_hot_to_rgb_batch(predict_x(x, model)))
        display_y = dataset.one_hot_to_rgb_batch(y)

        label_y = dataset.one_hot_to_label_batch(y)
        label_pred = dataset.rgb_to_label_batch(display_pred)

        confusion_mat += compute_confusion_mat(label_y, y_mask, label_pred,
                                               dataset.nb_labels)

        if (options.nb_eval_plot_samples is not None
                and sample_index < options.nb_eval_plot_samples):
            file_path = '{}.png'.format(sample_index)
            file_path = join(predictions_path, file_path)
            plot_prediction(generator,
                            all_x,
                            display_y,
                            display_pred,
                            file_path,
                            is_debug=True)

        if (options.nb_eval_samples is not None
                and sample_index == options.nb_eval_samples - 1):
            break

    scores = Scores()
    scores.compute_scores(label_names, confusion_mat)
    save_scores(scores, run_path)