Exemple #1
0
def load_training_rows(transform_dates=True,
                       transform_categorical_features=False,
                       add_timeseries_features=True):
    """

    """
    data_rows = load.load_training_data()
    historical_data = load.load_historical_training_data()
    features = load.load_features()
    timeseries_features = load.TIMESERIES_FEATURES

    data_rows, features = transformations(add_timeseries_features, data_rows,
                                          features, historical_data,
                                          timeseries_features,
                                          transform_categorical_features,
                                          transform_dates)

    label_rows = load.load_training_labels()

    return data_rows, features, label_rows
Exemple #2
0
def load_data_portion(denominator=0, offset=0):
    """Load a portion of the data.

    :param denominator: The number of partitions to divide the data into e.g. 3 (thirds)
    :param offset: The partition to load e.g. 2 (the second third)
    :return:
    """
    training_rows = load.load_training_data()
    training_labels = load.load_training_labels()
    features = load.load_features()

    number_of_rows = min(len(training_rows), len(training_labels))
    portion_length = (number_of_rows / denominator) if denominator else number_of_rows
    slice_start = offset * portion_length
    slice_end = slice_start + portion_length
    print 'Returning %s samples' % portion_length

    X, y = preprocessing.labelled_training_data(
        training_rows[slice_start:slice_end], training_labels[slice_start:slice_end],
        features, load.LABEL_NAME)
    return X, y
Exemple #3
0
                  (j, self.evaluate_test(test_x, test_y), len(test_x)))

    def evaluate_test(self, test_data_x, test_data_y):
        result = [(np.argmax(self.feed_forward(x)), y)
                  for x, y in zip(test_data_x, test_data_y)]
        return sum((int(x == y) for x, y in result))

    def show_img(self, x, y):
        print(y)
        x = np.reshape(x, (28, 28))
        p = plt.imshow(x, shape=(28, 28))
        plt.show(p)


if __name__ == '__main__':
    start_time = timeit.default_timer()
    x, y = load.load_training_data()
    test_x, test_y = load.load_test_data()
    net = NeuralNetwork([784, 30, 10])
    print('before training')
    print(net.feed_forward(x[0]))
    print("epch=before training %r/%r" %
          (net.evaluate_test(test_x, test_y), len(test_x)))
    net.update_weights(x, y, 3, 30, test_x, test_y, 10)
    print('after training')
    print(net.feed_forward(x[0]))
    print(y[0])
    stop_time = timeit.default_timer()
    progress_time = stop_time - start_time
    print('Time=', progress_time)
Exemple #4
0
print("Loading test set...")

# Load images
test_imgs = load.load_test_set()

# Resize test images to half their original size
resized_test_imgs = transformation.imgs_resize(test_imgs, int(test_imgs[0].shape[0]/2), int(test_imgs[0].shape[0]/2))

# Make patches out of the testset images
test_patches, overlap_image, nPatches = patch.make_patch_and_flatten(resized_test_imgs, PATCH_SIZE, OVERLAP)

# ================== LOAD VALIDATION SET ==================
print("Loading validation set...")

# Load validation set (used to determine the best threshold to discriminate foreground from backgound)
val_patches, val_gt_patches = load.load_training_data(load.PROVIDED_DATA_DIR)
val_gt_patches = np.expand_dims(val_gt_patches,axis=3)

# Resize validation images and groundtruth
resized_val_imgs = transformation.imgs_resize(val_patches, PATCH_SIZE, PATCH_SIZE)
resized_val_gts = transformation.groundtruth_resize(val_gt_patches.astype(float), PATCH_SIZE, PATCH_SIZE, RESIZE_THRESHOLD)

# Make sure that groundtruths are filled with only 0's and 1's
resized_val_gts = (resized_val_gts > 0.5).astype(int)

# ================== LOAD MODEL ==================
print("Loading model " + OUTPUT_NAME)

input_img = Input((PATCH_SIZE, PATCH_SIZE, IMG_NUM_DIM), name='img')
model = get_unet_200(input_img, num_classes=NUM_CLASSES, n_filters=16, dropout=0.6, batchnorm=True)
model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME + '.h5')
Exemple #5
0
def main(argv=None):

    # Fix RNG for reproducibility
    np.random.seed(1)

    # ================== LOAD ORIGINAL TRAINING DATA ==================
    print("Loading training data...")

    # Load original training dataset
    imgs, gt_imgs = load.load_training_data(load.PROVIDED_DATA_DIR)

    # Rotate images and groundtruth
    rot_imgs = transformation.imgs_rotate(imgs, ROT_ANGLE)
    rot_gt_imgs = transformation.imgs_rotate(gt_imgs, ROT_ANGLE)

    # Resize everything
    resized_imgs = transformation.imgs_resize(imgs, RESIZE_HEIGHT,
                                              RESIZE_WIDTH)
    resized_rot_imgs = transformation.imgs_resize(rot_imgs, RESIZE_HEIGHT,
                                                  RESIZE_WIDTH)
    resized_gt_imgs = transformation.groundtruth_resize(
        (gt_imgs).astype(float), RESIZE_HEIGHT, RESIZE_WIDTH,
        THRESHOLD_GT_RESIZE)
    resized_rot_gt_imgs = transformation.groundtruth_resize(
        (rot_gt_imgs).astype(float), RESIZE_HEIGHT, RESIZE_WIDTH,
        THRESHOLD_GT_RESIZE)

    # Concatenate normal images with rotated images
    all_train_imgs = np.append(resized_imgs, resized_rot_imgs, axis=0)
    all_train_gts = np.append(resized_gt_imgs, resized_rot_gt_imgs, axis=0)
    all_train_gts = np.expand_dims(all_train_gts, axis=3)

    # ================== LOAD VALIDATION DATA ==================
    print("Loading validation data...")

    # Load validation images
    val_imgs, val_gt_imgs = load.load_training_data(load.VALIDATION_DATA_DIR)

    # Rotate validation images and groundtruth
    rot_val_imgs = transformation.imgs_rotate(val_imgs, ROT_ANGLE)
    rot_val_gt_imgs = transformation.imgs_rotate(val_gt_imgs, ROT_ANGLE)

    # Resize everything
    resized_val_imgs = transformation.imgs_resize(val_imgs, RESIZE_HEIGHT,
                                                  RESIZE_WIDTH)
    resized_rot_val_imgs = transformation.imgs_resize(rot_val_imgs,
                                                      RESIZE_HEIGHT,
                                                      RESIZE_WIDTH)
    resized_val_gt_imgs = transformation.groundtruth_resize(
        val_gt_imgs.astype(float), RESIZE_HEIGHT, RESIZE_WIDTH,
        THRESHOLD_GT_RESIZE)
    resized_rot_val_gt_imgs = transformation.groundtruth_resize(
        rot_val_gt_imgs.astype(float), RESIZE_HEIGHT, RESIZE_WIDTH,
        THRESHOLD_GT_RESIZE)

    # Concatenate normal validation images with rotated validation images
    all_val_imgs = np.append(resized_val_imgs, resized_rot_val_imgs, axis=0)
    all_val_gts = np.append(resized_val_gt_imgs,
                            resized_rot_val_gt_imgs,
                            axis=0)
    all_val_gts = np.expand_dims(all_val_gts, axis=3)

    # ================== LOAD ADDITIONAL TRAINING DATA ==================
    print("Loading additional training data...")

    # Load additional training dataset
    add_imgs, add_gt_imgs, _, _ = load.load_training_data_and_patch(
        load.ADDITIONAL_DATA_DIR,
        PATCH_SIZE,
        random_selection=True,
        proportion=PROP_ADD)
    add_gt_imgs = np.expand_dims(add_gt_imgs, axis=3)
    all_imgs = np.append(all_train_imgs, add_imgs, axis=0)
    all_gts = np.append(all_train_gts, add_gt_imgs, axis=0)

    # Make sure that groundtruths are filled with only 0's and 1's
    all_gts = (all_gts > 0.5).astype(int)
    all_val_gts = (all_val_gts > 0.5).astype(int)

    # ================== CREATE MODEL ==================
    print("Creating the model...")

    # Maximal number of epochs
    EPOCHS = 80

    # Number of classes on which to build the model
    NUM_CLASSES = 1

    # Size of a batch for the model
    BATCH_SIZE = 100

    # Amount of dropout for the model
    DROUPOUT = 0.6

    # Set up the model
    input_img = Input((RESIZE_HEIGHT, RESIZE_WIDTH, IMG_NUM_DIM), name='img')
    model = get_unet_200(input_img,
                         num_classes=NUM_CLASSES,
                         n_filters=16,
                         dropout=DROUPOUT,
                         batchnorm=True)
    model.compile(optimizer=Adam(),
                  loss="binary_crossentropy",
                  metrics=[f1_score])
    model.summary()

    # Set up callbacks
    callbacks = [
        EarlyStopping(patience=10, verbose=1),
        ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1),
        ModelCheckpoint(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME + '.h5',
                        verbose=1,
                        save_best_only=True,
                        save_weights_only=True)
    ]

    # ================== TRAIN THE MODEL ==================
    print("Training the model...")
    _ = model.fit(all_imgs,
                  all_gts,
                  batch_size=BATCH_SIZE,
                  epochs=EPOCHS,
                  callbacks=callbacks,
                  verbose=1,
                  validation_data=(all_val_imgs, all_val_gts))

    # ================== SAVE THE MODEL ==================
    print("Training complete. Saving model's history to " + OUTPUT_NAME)
    submission.save_training_history(OUTPUT_NAME, model.history)
def main(argv=None):

    # Fix RNG for reproducibility
    np.random.seed(1)

    # Load images
    print("Loading test set...")
    test_imgs = load.load_test_set()

    # ================== unet_patch_12O_rot ==================

    # Make patches out of the testset images
    print("Making patches for model " + OUTPUT_NAME_120)
    test_patches, overlap_test_image, n_test_patches = patch.make_patch_and_flatten(
        test_imgs, PATCH_SIZE_120, OVERLAP_120)

    # Load validation set (used to determine the best threshold to discriminate foreground from backgound)
    val_imgs, val_gts = load.load_training_data(load.PROVIDED_DATA_DIR)
    # Make sure that groundtruths are filled with only 0's and 1's
    val_gts = (val_gts > 0.5).astype(int)

    # Make patches out of the validation images
    val_patches, overlap_val_image, n_val_patches = patch.make_patch_and_flatten(
        val_imgs, PATCH_SIZE_120, OVERLAP_120)

    # Load first model
    print("Loading model " + OUTPUT_NAME_120)
    input_img_120 = Input((PATCH_SIZE_120, PATCH_SIZE_120, IMG_NUM_DIM),
                          name='img')
    model = get_unet_120(input_img_120,
                         num_classes=NUM_CLASSES,
                         n_filters=16,
                         dropout=0.4,
                         batchnorm=True)
    model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME_120 + '.h5')

    # Make predictions
    print("Making predictions for model " + OUTPUT_NAME_120)
    predictions_test = model.predict(test_patches, verbose=1)
    predictions_val = model.predict(val_patches, verbose=1)

    # Reconstruct predictions and resize
    predictions_val_120 = patch.reconstruct_from_flatten(
        np.squeeze(predictions_val), overlap_val_image, n_val_patches,
        OVERLAP_120)
    predictions_val_120 = transformation.imgs_resize(predictions_val_120,
                                                     PATCH_SIZE_200,
                                                     PATCH_SIZE_200)
    predictions_120 = patch.reconstruct_from_flatten(
        np.squeeze(predictions_test), overlap_test_image, n_test_patches,
        OVERLAP_120)
    predictions_120 = transformation.imgs_resize(predictions_120,
                                                 int(test_imgs.shape[1] / 2),
                                                 int(test_imgs.shape[1] / 2))

    # ================== unet_patch_200_rot ==================

    # Make patches out of the testset images
    print("Making patches for model " + OUTPUT_NAME_200)
    resized_test_imgs = transformation.imgs_resize(test_imgs,
                                                   int(test_imgs.shape[1] / 2),
                                                   int(test_imgs.shape[1] / 2))
    test_patches, overlap_test_image, n_test_patches = patch.make_patch_and_flatten(
        resized_test_imgs, PATCH_SIZE_200, OVERLAP_200)

    # Resize images and groundtruths
    resized_val_imgs = transformation.imgs_resize(val_imgs, PATCH_SIZE_200,
                                                  PATCH_SIZE_200)
    resized_val_gts = transformation.groundtruth_resize(
        (val_gts).astype(float), PATCH_SIZE_200, PATCH_SIZE_200,
        RESIZE_THRESHOLD)

    # Load second model
    print("Loading model " + OUTPUT_NAME_200)
    input_img_200 = Input((PATCH_SIZE_200, PATCH_SIZE_200, IMG_NUM_DIM),
                          name='img')
    model = get_unet_200(input_img_200,
                         num_classes=NUM_CLASSES,
                         n_filters=16,
                         dropout=0.6,
                         batchnorm=True)
    model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME_200 + '.h5')

    # Make predictions
    print("Making predictions for model " + OUTPUT_NAME_200)
    predictions_test = model.predict(test_patches, verbose=1)
    predictions_val_200 = model.predict(resized_val_imgs, verbose=1)

    # Reconstruct predictions
    predictions_val_200 = np.squeeze(predictions_val_200)
    predictions_200 = patch.reconstruct_from_flatten(
        np.squeeze(predictions_test), overlap_test_image, n_test_patches,
        OVERLAP_200)

    # ================== FIND BEST THRESHOLD  ==================
    print("Looking for best convex combination of both models...")
    THRESHOLD_INC = 0.05
    RATIO_INC = 0.05
    threshold = np.arange(0.1, round(1 + THRESHOLD_INC, 2), THRESHOLD_INC)
    ratio = np.arange(0, round(1 + RATIO_INC, 2), RATIO_INC)

    best_overall_score = 0
    for r in ratio:

        # Reinitialize the best score and best threshold
        best_score = 0
        best_thr = 0

        # Take convex combination of both predictions
        predictions_val = r * predictions_val_120 + (1 -
                                                     r) * predictions_val_200

        for thr in threshold:
            # Compute predictions on validation set and compute F1 score
            predictions_val_bin = (predictions_val > thr).astype(int)
            score = f1_custom(resized_val_gts, predictions_val_bin)

            if score > best_score:  # We got our best score yet
                best_score = score
                best_thr = thr

        if best_score > best_overall_score:  # We got our best score yet
            best_overall_score = best_score
            best_overall_thr = best_thr
            best_ratio = r

    print("Best score: " + str(round(best_overall_score, 4)) +
          ", with best ratio: " + str(round(best_ratio, 2)) +
          ", and best threshold: " + str(round(best_overall_thr, 2)))

    # ================== GENERATE SUBMISSION FILE ==================
    COMBINED_OUTPUT = "combined_model"
    print("Creating submission file " + COMBINED_OUTPUT)

    predictions = best_ratio * predictions_120 + (1 -
                                                  best_ratio) * predictions_200
    predictions = (predictions > best_overall_thr).astype(int)

    FOREGROUND_THRESHOLD = 0.25
    submission.predictions_to_submission(predictions,
                                         COMBINED_OUTPUT + ".csv",
                                         FOREGROUND_THRESHOLD,
                                         patch_size_submission=8)