def load_training_rows(transform_dates=True, transform_categorical_features=False, add_timeseries_features=True): """ """ data_rows = load.load_training_data() historical_data = load.load_historical_training_data() features = load.load_features() timeseries_features = load.TIMESERIES_FEATURES data_rows, features = transformations(add_timeseries_features, data_rows, features, historical_data, timeseries_features, transform_categorical_features, transform_dates) label_rows = load.load_training_labels() return data_rows, features, label_rows
def load_data_portion(denominator=0, offset=0): """Load a portion of the data. :param denominator: The number of partitions to divide the data into e.g. 3 (thirds) :param offset: The partition to load e.g. 2 (the second third) :return: """ training_rows = load.load_training_data() training_labels = load.load_training_labels() features = load.load_features() number_of_rows = min(len(training_rows), len(training_labels)) portion_length = (number_of_rows / denominator) if denominator else number_of_rows slice_start = offset * portion_length slice_end = slice_start + portion_length print 'Returning %s samples' % portion_length X, y = preprocessing.labelled_training_data( training_rows[slice_start:slice_end], training_labels[slice_start:slice_end], features, load.LABEL_NAME) return X, y
(j, self.evaluate_test(test_x, test_y), len(test_x))) def evaluate_test(self, test_data_x, test_data_y): result = [(np.argmax(self.feed_forward(x)), y) for x, y in zip(test_data_x, test_data_y)] return sum((int(x == y) for x, y in result)) def show_img(self, x, y): print(y) x = np.reshape(x, (28, 28)) p = plt.imshow(x, shape=(28, 28)) plt.show(p) if __name__ == '__main__': start_time = timeit.default_timer() x, y = load.load_training_data() test_x, test_y = load.load_test_data() net = NeuralNetwork([784, 30, 10]) print('before training') print(net.feed_forward(x[0])) print("epch=before training %r/%r" % (net.evaluate_test(test_x, test_y), len(test_x))) net.update_weights(x, y, 3, 30, test_x, test_y, 10) print('after training') print(net.feed_forward(x[0])) print(y[0]) stop_time = timeit.default_timer() progress_time = stop_time - start_time print('Time=', progress_time)
print("Loading test set...") # Load images test_imgs = load.load_test_set() # Resize test images to half their original size resized_test_imgs = transformation.imgs_resize(test_imgs, int(test_imgs[0].shape[0]/2), int(test_imgs[0].shape[0]/2)) # Make patches out of the testset images test_patches, overlap_image, nPatches = patch.make_patch_and_flatten(resized_test_imgs, PATCH_SIZE, OVERLAP) # ================== LOAD VALIDATION SET ================== print("Loading validation set...") # Load validation set (used to determine the best threshold to discriminate foreground from backgound) val_patches, val_gt_patches = load.load_training_data(load.PROVIDED_DATA_DIR) val_gt_patches = np.expand_dims(val_gt_patches,axis=3) # Resize validation images and groundtruth resized_val_imgs = transformation.imgs_resize(val_patches, PATCH_SIZE, PATCH_SIZE) resized_val_gts = transformation.groundtruth_resize(val_gt_patches.astype(float), PATCH_SIZE, PATCH_SIZE, RESIZE_THRESHOLD) # Make sure that groundtruths are filled with only 0's and 1's resized_val_gts = (resized_val_gts > 0.5).astype(int) # ================== LOAD MODEL ================== print("Loading model " + OUTPUT_NAME) input_img = Input((PATCH_SIZE, PATCH_SIZE, IMG_NUM_DIM), name='img') model = get_unet_200(input_img, num_classes=NUM_CLASSES, n_filters=16, dropout=0.6, batchnorm=True) model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME + '.h5')
def main(argv=None): # Fix RNG for reproducibility np.random.seed(1) # ================== LOAD ORIGINAL TRAINING DATA ================== print("Loading training data...") # Load original training dataset imgs, gt_imgs = load.load_training_data(load.PROVIDED_DATA_DIR) # Rotate images and groundtruth rot_imgs = transformation.imgs_rotate(imgs, ROT_ANGLE) rot_gt_imgs = transformation.imgs_rotate(gt_imgs, ROT_ANGLE) # Resize everything resized_imgs = transformation.imgs_resize(imgs, RESIZE_HEIGHT, RESIZE_WIDTH) resized_rot_imgs = transformation.imgs_resize(rot_imgs, RESIZE_HEIGHT, RESIZE_WIDTH) resized_gt_imgs = transformation.groundtruth_resize( (gt_imgs).astype(float), RESIZE_HEIGHT, RESIZE_WIDTH, THRESHOLD_GT_RESIZE) resized_rot_gt_imgs = transformation.groundtruth_resize( (rot_gt_imgs).astype(float), RESIZE_HEIGHT, RESIZE_WIDTH, THRESHOLD_GT_RESIZE) # Concatenate normal images with rotated images all_train_imgs = np.append(resized_imgs, resized_rot_imgs, axis=0) all_train_gts = np.append(resized_gt_imgs, resized_rot_gt_imgs, axis=0) all_train_gts = np.expand_dims(all_train_gts, axis=3) # ================== LOAD VALIDATION DATA ================== print("Loading validation data...") # Load validation images val_imgs, val_gt_imgs = load.load_training_data(load.VALIDATION_DATA_DIR) # Rotate validation images and groundtruth rot_val_imgs = transformation.imgs_rotate(val_imgs, ROT_ANGLE) rot_val_gt_imgs = transformation.imgs_rotate(val_gt_imgs, ROT_ANGLE) # Resize everything resized_val_imgs = transformation.imgs_resize(val_imgs, RESIZE_HEIGHT, RESIZE_WIDTH) resized_rot_val_imgs = transformation.imgs_resize(rot_val_imgs, RESIZE_HEIGHT, RESIZE_WIDTH) resized_val_gt_imgs = transformation.groundtruth_resize( val_gt_imgs.astype(float), RESIZE_HEIGHT, RESIZE_WIDTH, THRESHOLD_GT_RESIZE) resized_rot_val_gt_imgs = transformation.groundtruth_resize( rot_val_gt_imgs.astype(float), RESIZE_HEIGHT, RESIZE_WIDTH, THRESHOLD_GT_RESIZE) # Concatenate normal validation images with rotated validation images all_val_imgs = np.append(resized_val_imgs, resized_rot_val_imgs, axis=0) all_val_gts = np.append(resized_val_gt_imgs, resized_rot_val_gt_imgs, axis=0) all_val_gts = np.expand_dims(all_val_gts, axis=3) # ================== LOAD ADDITIONAL TRAINING DATA ================== print("Loading additional training data...") # Load additional training dataset add_imgs, add_gt_imgs, _, _ = load.load_training_data_and_patch( load.ADDITIONAL_DATA_DIR, PATCH_SIZE, random_selection=True, proportion=PROP_ADD) add_gt_imgs = np.expand_dims(add_gt_imgs, axis=3) all_imgs = np.append(all_train_imgs, add_imgs, axis=0) all_gts = np.append(all_train_gts, add_gt_imgs, axis=0) # Make sure that groundtruths are filled with only 0's and 1's all_gts = (all_gts > 0.5).astype(int) all_val_gts = (all_val_gts > 0.5).astype(int) # ================== CREATE MODEL ================== print("Creating the model...") # Maximal number of epochs EPOCHS = 80 # Number of classes on which to build the model NUM_CLASSES = 1 # Size of a batch for the model BATCH_SIZE = 100 # Amount of dropout for the model DROUPOUT = 0.6 # Set up the model input_img = Input((RESIZE_HEIGHT, RESIZE_WIDTH, IMG_NUM_DIM), name='img') model = get_unet_200(input_img, num_classes=NUM_CLASSES, n_filters=16, dropout=DROUPOUT, batchnorm=True) model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=[f1_score]) model.summary() # Set up callbacks callbacks = [ EarlyStopping(patience=10, verbose=1), ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1), ModelCheckpoint(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME + '.h5', verbose=1, save_best_only=True, save_weights_only=True) ] # ================== TRAIN THE MODEL ================== print("Training the model...") _ = model.fit(all_imgs, all_gts, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=callbacks, verbose=1, validation_data=(all_val_imgs, all_val_gts)) # ================== SAVE THE MODEL ================== print("Training complete. Saving model's history to " + OUTPUT_NAME) submission.save_training_history(OUTPUT_NAME, model.history)
def main(argv=None): # Fix RNG for reproducibility np.random.seed(1) # Load images print("Loading test set...") test_imgs = load.load_test_set() # ================== unet_patch_12O_rot ================== # Make patches out of the testset images print("Making patches for model " + OUTPUT_NAME_120) test_patches, overlap_test_image, n_test_patches = patch.make_patch_and_flatten( test_imgs, PATCH_SIZE_120, OVERLAP_120) # Load validation set (used to determine the best threshold to discriminate foreground from backgound) val_imgs, val_gts = load.load_training_data(load.PROVIDED_DATA_DIR) # Make sure that groundtruths are filled with only 0's and 1's val_gts = (val_gts > 0.5).astype(int) # Make patches out of the validation images val_patches, overlap_val_image, n_val_patches = patch.make_patch_and_flatten( val_imgs, PATCH_SIZE_120, OVERLAP_120) # Load first model print("Loading model " + OUTPUT_NAME_120) input_img_120 = Input((PATCH_SIZE_120, PATCH_SIZE_120, IMG_NUM_DIM), name='img') model = get_unet_120(input_img_120, num_classes=NUM_CLASSES, n_filters=16, dropout=0.4, batchnorm=True) model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME_120 + '.h5') # Make predictions print("Making predictions for model " + OUTPUT_NAME_120) predictions_test = model.predict(test_patches, verbose=1) predictions_val = model.predict(val_patches, verbose=1) # Reconstruct predictions and resize predictions_val_120 = patch.reconstruct_from_flatten( np.squeeze(predictions_val), overlap_val_image, n_val_patches, OVERLAP_120) predictions_val_120 = transformation.imgs_resize(predictions_val_120, PATCH_SIZE_200, PATCH_SIZE_200) predictions_120 = patch.reconstruct_from_flatten( np.squeeze(predictions_test), overlap_test_image, n_test_patches, OVERLAP_120) predictions_120 = transformation.imgs_resize(predictions_120, int(test_imgs.shape[1] / 2), int(test_imgs.shape[1] / 2)) # ================== unet_patch_200_rot ================== # Make patches out of the testset images print("Making patches for model " + OUTPUT_NAME_200) resized_test_imgs = transformation.imgs_resize(test_imgs, int(test_imgs.shape[1] / 2), int(test_imgs.shape[1] / 2)) test_patches, overlap_test_image, n_test_patches = patch.make_patch_and_flatten( resized_test_imgs, PATCH_SIZE_200, OVERLAP_200) # Resize images and groundtruths resized_val_imgs = transformation.imgs_resize(val_imgs, PATCH_SIZE_200, PATCH_SIZE_200) resized_val_gts = transformation.groundtruth_resize( (val_gts).astype(float), PATCH_SIZE_200, PATCH_SIZE_200, RESIZE_THRESHOLD) # Load second model print("Loading model " + OUTPUT_NAME_200) input_img_200 = Input((PATCH_SIZE_200, PATCH_SIZE_200, IMG_NUM_DIM), name='img') model = get_unet_200(input_img_200, num_classes=NUM_CLASSES, n_filters=16, dropout=0.6, batchnorm=True) model.load_weights(submission.MODELS_OUTPUT_DIR + OUTPUT_NAME_200 + '.h5') # Make predictions print("Making predictions for model " + OUTPUT_NAME_200) predictions_test = model.predict(test_patches, verbose=1) predictions_val_200 = model.predict(resized_val_imgs, verbose=1) # Reconstruct predictions predictions_val_200 = np.squeeze(predictions_val_200) predictions_200 = patch.reconstruct_from_flatten( np.squeeze(predictions_test), overlap_test_image, n_test_patches, OVERLAP_200) # ================== FIND BEST THRESHOLD ================== print("Looking for best convex combination of both models...") THRESHOLD_INC = 0.05 RATIO_INC = 0.05 threshold = np.arange(0.1, round(1 + THRESHOLD_INC, 2), THRESHOLD_INC) ratio = np.arange(0, round(1 + RATIO_INC, 2), RATIO_INC) best_overall_score = 0 for r in ratio: # Reinitialize the best score and best threshold best_score = 0 best_thr = 0 # Take convex combination of both predictions predictions_val = r * predictions_val_120 + (1 - r) * predictions_val_200 for thr in threshold: # Compute predictions on validation set and compute F1 score predictions_val_bin = (predictions_val > thr).astype(int) score = f1_custom(resized_val_gts, predictions_val_bin) if score > best_score: # We got our best score yet best_score = score best_thr = thr if best_score > best_overall_score: # We got our best score yet best_overall_score = best_score best_overall_thr = best_thr best_ratio = r print("Best score: " + str(round(best_overall_score, 4)) + ", with best ratio: " + str(round(best_ratio, 2)) + ", and best threshold: " + str(round(best_overall_thr, 2))) # ================== GENERATE SUBMISSION FILE ================== COMBINED_OUTPUT = "combined_model" print("Creating submission file " + COMBINED_OUTPUT) predictions = best_ratio * predictions_120 + (1 - best_ratio) * predictions_200 predictions = (predictions > best_overall_thr).astype(int) FOREGROUND_THRESHOLD = 0.25 submission.predictions_to_submission(predictions, COMBINED_OUTPUT + ".csv", FOREGROUND_THRESHOLD, patch_size_submission=8)