early = EarlyStopping(
    monitor="val_loss", mode="min", patience=10
)  # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]
if not os.path.exists(weight_path):
    bone_age_model.fit_generator(train_gen,
                                 steps_per_epoch=train_size / 100,
                                 validation_data=(valid_X, valid_Y),
                                 epochs=10,
                                 callbacks=callbacks_list,
                                 verbose=1)
bone_age_model.load_weights(weight_path)
print("Training complete !!!\n")
print("Evaluating model on test data\n")
print("Preparing testing dataset")
test_X, test_Y = next(
    datagen(test_df,
            path_col='path',
            y_col='zscore',
            gender_col='male',
            batch_size=test_size,
            target_size=IMG_SIZE,
            color_mode='rgb',
            seed=8309))
print("Data prepared")
pred_Y = mu + sigma * bone_age_model.predict(
    x=test_X, batch_size=25, verbose=1)
test_Y_months = mu + sigma * test_Y
print("Mean absolute error on test data: " +
      str(sk_mae(test_Y_months, pred_Y)))
Exemple #2
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = str(0)
    start = time.time()
    RAND_SEED = 2408
    IMG_SIZE = (224, 224)
    logger = get_logger()
    logger.info("=== Start bone age prediction ===")

    # Load metadata from csv
    df, mu, sigma = load_data_from_dataframe(logger)

    # Split into training testing and validation datasets
    train_df, valid_df, test_df = split_dataset(df, logger, seed=RAND_SEED)
    train_size = train_df.shape[0]
    valid_size = valid_df.shape[0]
    test_size = test_df.shape[0]
    logger.info("Training images:   {}".format(train_size))
    logger.info("Validation images: {}".format(valid_size))
    logger.info("Testing images:    {}".format(test_size))

    img_gen = get_img_data_gen()
    train_gen = flow_from_dataframe(img_gen,
                                    train_df,
                                    path_col='path',
                                    y_col='zscore',
                                    gender_col='male',
                                    batch_size=32,
                                    seed=RAND_SEED,
                                    img_size=IMG_SIZE)

    logger.info("Preparing validation data...")
    # Get the validation data
    valid_gen = flow_from_dataframe(img_gen,
                                    valid_df,
                                    path_col='path',
                                    y_col='zscore',
                                    gender_col='male',
                                    batch_size=valid_size,
                                    seed=RAND_SEED,
                                    img_size=IMG_SIZE)
    valid_X, valid_Y = next(valid_gen)
    IMG_SHAPE = valid_X[0][0, :, :, :].shape
    logger.info("Image shape: "+str(IMG_SHAPE))
    logger.info("Data preproation done")

    # Model definition
    bone_age_model = get_bone_gender_age_vgg_model(IMG_SHAPE,
                                                   logger,
                                                   mu,
                                                   sigma)

    logger.info("=== Star training model ===")
    # Model Callbacks
    epochs = 20
    weight_path = "bone_age_weights_untrainable_VGG16_gender" +\
                  "_{}_epochs_relu_less_dropout_dense.best.hdf5".format(epochs)
    checkpoint = ModelCheckpoint(weight_path,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 save_weights_only=True)

    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.8,
                                       patience=10,
                                       verbose=1,
                                       mode='auto',
                                       epsilon=0.0001,
                                       cooldown=5,
                                       min_lr=0.0001)

    early = EarlyStopping(monitor="val_loss", mode="min", patience=10)
    callbacks_list = [checkpoint, early, reduceLROnPlat]
    if not os.path.exists(weight_path):
        history = \
            bone_age_model.fit_generator(train_gen,
                                         steps_per_epoch=train_size/32,
                                         validation_data=(valid_X, valid_Y),
                                         epochs=epochs,
                                         callbacks=callbacks_list,
                                         verbose=1)
        with open('history_gender_vgg16_freeze_epoch_{}.p'.format(epochs), 'wb') as f:
            pickle.dump(history.history, f)
    bone_age_model.load_weights(weight_path)
    logger.info("Training complete !!!\n")

    # Evaluate model on test dataset
    logger.info("Evaluating model on test data ...\n")
    logger.info("Preparing testing dataset...")
    test_gen = flow_from_dataframe(img_gen,
                                   test_df,
                                   path_col='path',
                                   y_col='zscore',
                                   gender_col='male',
                                   batch_size=test_size,
                                   seed=8309,
                                   img_size=IMG_SIZE)
    test_X, test_Y = next(test_gen)
    logger.info("Data prepared !!!")

    pred_Y = mu+sigma*bone_age_model.predict(x=test_X,
                                             batch_size=25,
                                             verbose=1)
    test_Y_months = mu+sigma*test_Y
    logger.info("Mean absolute error on test data: "
                + str(sk_mae(test_Y_months, pred_Y)))

    fig, ax1 = plt.subplots(1, 1, figsize=(6, 6))
    ax1.plot(test_Y_months, pred_Y, 'r.', label='predictions')
    ax1.plot(test_Y_months, test_Y_months, 'b-', label='actual')
    ax1.legend()
    ax1.set_xlabel('Actual Age (Months)')
    ax1.set_ylabel('Predicted Age (Months)')
    plt.savefig('prediction_gender_epoch_{}.png'.format(epochs))

    ord_idx = np.argsort(test_Y)
    ord_idx = ord_idx[np.linspace(0, len(ord_idx)-1, num=8).astype(int)]
    fig, m_axs = plt.subplots(2, 4, figsize=(16, 32))
    for (idx, c_ax) in zip(ord_idx, m_axs.flatten()):
        c_ax.imshow(test_X[0][idx, :, :, 0], cmap='bone')
        title = 'Age: %2.1f\nPredicted Age: %2.1f\nGender: ' % (test_Y_months[idx], pred_Y[idx])
        if test_X[1][idx] == 0:
            title += "Female\n"
        else:
            title += "Male\n"
        c_ax.set_title(title)
        c_ax.axis('off')
    plt.savefig('visulize_xray.png')
    # Done
    total_sec = time.time() - start
    logger.info("Total run took {} (Hours:Min:Sec)".format(str(datetime.timedelta(
        seconds=total_sec))))
    logger.info("done!")
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.8,
                                   patience=10,
                                   verbose=1,
                                   mode='auto',
                                   epsilon=0.0001,
                                   cooldown=5,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_loss", mode="min", patience=10)
callbacks_list = [checkpoint, early, reduceLROnPlat]

if not os.path.exists(weight_path):
    model.fit_generator(train_gen,
                        steps_per_epoch=500,
                        validation_data=(valid_X, valid_Y),
                        epochs=10,
                        callbacks=callbacks_list,
                        verbose=1)
else:
    model.load_weights(weight_path)
test_X, test_Y = next(
    flow_from_dataframe(test_df,
                        path_col='path',
                        y_col='Sign',
                        target_size=IMG_SIZE,
                        color_mode='rgb',
                        batch_size=test_size))

pred_Y = model.predict(test_X, batch_size=25, verbose=1)
print("Mean absolute error on test data: " + str(sk_mae(test_Y, pred_Y)))