Ejemplo n.º 1
0
class NNet:
    def __init__(self,
                 val_split=.0,
                 model_to_load='None',
                 net_type='u_xception',
                 load_weights='None',
                 data_paths=None):
        assert net_type in [
            'u_xception', 'u_resnet50v2', 'unet'
        ], "net_type must be one of ['u_xception', 'u_resnet50v2', 'unet']"
        self.net_type = net_type
        print('creating model: {}'.format(net_type))
        if model_to_load == 'None':
            if net_type == 'u_xception':
                self.model = u_xception_net()
            elif net_type == 'u_resnet50v2':
                self.model = u_resnet50v2_net()
            elif net_type == 'unet':
                self.model = u_net()

            print('created model: {}'.format(self.model.name))

            if load_weights != 'None':
                weights = np.load(load_weights, allow_pickle=True)
                self.model.set_weights(weights)
                print('loaded weights: {}'.format(load_weights))
        else:
            self.model = load_model(model_to_load,
                                    custom_objects={
                                        'soft_dice_loss': soft_dice_loss,
                                        'dice_coef': dice_coef,
                                        'iou_coef': iou_coef
                                    })
            print('loaded model: {}'.format(model_to_load))
            print('model name: {}'.format(self.model.name))

        self.data = None
        self.valid_set = None
        self.val_split = val_split
        self.load_data(val_split=val_split, paths=data_paths)

        if data_paths is None:
            self.test_data_gen = TestData()
        else:
            self.test_data_gen = TestData(data_paths['test_data_path'])
        self.test_images = self.test_data_gen.get_test_data()
        self.preprocessed_test_images = preprocess_test_images(
            self.test_images) / 255
        self.test_images_predictions = None

    def load_data(self, val_split=.0, paths=None):
        self.val_split = val_split
        if paths is None:
            self.data = DataGenerator(val_split=val_split)
        else:
            self.data = DataGenerator(
                val_split=val_split,
                image_path=paths['image_path'],
                groundtruth_path=paths['groundtruth_path'],
                additional_images_path=paths['additional_images_path'],
                additional_masks_path=paths['additional_masks_path'])
        if self.val_split != .0:
            self.valid_set = self.data.return_validation_set()
        else:
            self.valid_set = next(self.data.generator(len(self.data.images)))

    def train(self,
              loss='bce',
              epochs=100,
              l_rate=.0001,
              batch_size=8,
              train_on='competition_data',
              verb=1):
        assert loss in ['bce', 'dice'], "loss must be one of ['bce', 'dice']"
        assert train_on in [
            'competition_data', 'google_data'
        ], "train_on must be one of ['competition_data', 'google_data']"
        optimizer = keras.optimizers.adam(l_rate)
        metrics = ['acc', iou_coef, dice_coef]
        if loss == 'bce':
            self.model.compile(optimizer=optimizer,
                               loss='binary_crossentropy',
                               metrics=metrics)
        else:
            self.model.compile(optimizer=optimizer,
                               loss=soft_dice_loss,
                               metrics=metrics)

        if train_on == 'competition_data':
            steps = len(self.data.images) // batch_size

            if self.val_split != .0:
                self.model.fit_generator(
                    generator=self.data.generator(batch_size),
                    validation_data=self.valid_set,
                    epochs=epochs,
                    steps_per_epoch=steps,
                    callbacks=create_callbacks(loss, with_val=True),
                    verbose=verb)
            else:
                self.model.fit_generator(
                    generator=self.data.generator(batch_size),
                    epochs=epochs,
                    steps_per_epoch=steps,
                    callbacks=create_callbacks(loss),
                    verbose=verb)

        else:
            steps = len(self.data.additional_images) // batch_size
            # val_data = (np.array(self.data.images)/255, np.round(np.expand_dims(np.array(self.data.truths), -1)/255))
            self.model.fit_generator(
                generator=self.data.additional_generator(batch_size),
                epochs=epochs,
                steps_per_epoch=steps,
                callbacks=create_callbacks(loss),
                verbose=verb)

    def check_outputs(self):
        plt.figure(figsize=(15, 15))
        gen = self.data.generator(30)
        batch = next(gen)

        display_predictions(batch[0], self.model.predict(batch[0]), batch[1])

    def evaluate_model(self):
        labelizer = Labelizer()
        val_predictions = self.model.predict(self.valid_set[0]).reshape(
            -1,
            400,
            400,
        )
        predictions_labs = labelizer.make_submission(val_predictions)[0]
        groundtruths = labelizer.make_submission(self.valid_set[1])[0]
        print(accuracy_score(groundtruths, predictions_labs))

    def save_model(self, path=None):
        if path is None:
            path = "model-{}.npy".format(self.net_type)
        weights = self.model.get_weights()
        np.save(path, weights)

    def predict_test_data(self):
        predictions = self.model.predict(self.preprocessed_test_images)
        self.test_images_predictions = merge_predictions(predictions.reshape(
            -1,
            400,
            400,
        ),
                                                         mode='max')
        return self.test_images_predictions

    def display_test_predictions(self,
                                 submission_path,
                                 samples_number=5,
                                 figure_size=(15, 15)):
        plt.figure(figsize=figure_size)
        display_predictions(self.test_images,
                            self.test_images_predictions,
                            submission_outputs=submission_outputs(
                                submission_path, self.test_data_gen.numbers),
                            samples=samples_number)

    def create_submission_file(self, path='submission.csv', treshold=.25):
        labelizer = Labelizer(treshold)
        submission = labelizer.make_submission(self.test_images_predictions,
                                               self.test_data_gen.numbers)
        submission_df = pd.DataFrame({
            'id': submission[1],
            'prediction': submission[0]
        })
        submission_df.to_csv(path, index=False)
        return submission_df