early = EarlyStopping( monitor="val_loss", mode="min", patience=10 ) # probably needs to be more patient, but kaggle time is limited callbacks_list = [checkpoint, early, reduceLROnPlat] if not os.path.exists(weight_path): bone_age_model.fit_generator(train_gen, steps_per_epoch=train_size / 100, validation_data=(valid_X, valid_Y), epochs=10, callbacks=callbacks_list, verbose=1) bone_age_model.load_weights(weight_path) print("Training complete !!!\n") print("Evaluating model on test data\n") print("Preparing testing dataset") test_X, test_Y = next( datagen(test_df, path_col='path', y_col='zscore', gender_col='male', batch_size=test_size, target_size=IMG_SIZE, color_mode='rgb', seed=8309)) print("Data prepared") pred_Y = mu + sigma * bone_age_model.predict( x=test_X, batch_size=25, verbose=1) test_Y_months = mu + sigma * test_Y print("Mean absolute error on test data: " + str(sk_mae(test_Y_months, pred_Y)))
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = str(0) start = time.time() RAND_SEED = 2408 IMG_SIZE = (224, 224) logger = get_logger() logger.info("=== Start bone age prediction ===") # Load metadata from csv df, mu, sigma = load_data_from_dataframe(logger) # Split into training testing and validation datasets train_df, valid_df, test_df = split_dataset(df, logger, seed=RAND_SEED) train_size = train_df.shape[0] valid_size = valid_df.shape[0] test_size = test_df.shape[0] logger.info("Training images: {}".format(train_size)) logger.info("Validation images: {}".format(valid_size)) logger.info("Testing images: {}".format(test_size)) img_gen = get_img_data_gen() train_gen = flow_from_dataframe(img_gen, train_df, path_col='path', y_col='zscore', gender_col='male', batch_size=32, seed=RAND_SEED, img_size=IMG_SIZE) logger.info("Preparing validation data...") # Get the validation data valid_gen = flow_from_dataframe(img_gen, valid_df, path_col='path', y_col='zscore', gender_col='male', batch_size=valid_size, seed=RAND_SEED, img_size=IMG_SIZE) valid_X, valid_Y = next(valid_gen) IMG_SHAPE = valid_X[0][0, :, :, :].shape logger.info("Image shape: "+str(IMG_SHAPE)) logger.info("Data preproation done") # Model definition bone_age_model = get_bone_gender_age_vgg_model(IMG_SHAPE, logger, mu, sigma) logger.info("=== Star training model ===") # Model Callbacks epochs = 20 weight_path = "bone_age_weights_untrainable_VGG16_gender" +\ "_{}_epochs_relu_less_dropout_dense.best.hdf5".format(epochs) checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001) early = EarlyStopping(monitor="val_loss", mode="min", patience=10) callbacks_list = [checkpoint, early, reduceLROnPlat] if not os.path.exists(weight_path): history = \ bone_age_model.fit_generator(train_gen, steps_per_epoch=train_size/32, validation_data=(valid_X, valid_Y), epochs=epochs, callbacks=callbacks_list, verbose=1) with open('history_gender_vgg16_freeze_epoch_{}.p'.format(epochs), 'wb') as f: pickle.dump(history.history, f) bone_age_model.load_weights(weight_path) logger.info("Training complete !!!\n") # Evaluate model on test dataset logger.info("Evaluating model on test data ...\n") logger.info("Preparing testing dataset...") test_gen = flow_from_dataframe(img_gen, test_df, path_col='path', y_col='zscore', gender_col='male', batch_size=test_size, seed=8309, img_size=IMG_SIZE) test_X, test_Y = next(test_gen) logger.info("Data prepared !!!") pred_Y = mu+sigma*bone_age_model.predict(x=test_X, batch_size=25, verbose=1) test_Y_months = mu+sigma*test_Y logger.info("Mean absolute error on test data: " + str(sk_mae(test_Y_months, pred_Y))) fig, ax1 = plt.subplots(1, 1, figsize=(6, 6)) ax1.plot(test_Y_months, pred_Y, 'r.', label='predictions') ax1.plot(test_Y_months, test_Y_months, 'b-', label='actual') ax1.legend() ax1.set_xlabel('Actual Age (Months)') ax1.set_ylabel('Predicted Age (Months)') plt.savefig('prediction_gender_epoch_{}.png'.format(epochs)) ord_idx = np.argsort(test_Y) ord_idx = ord_idx[np.linspace(0, len(ord_idx)-1, num=8).astype(int)] fig, m_axs = plt.subplots(2, 4, figsize=(16, 32)) for (idx, c_ax) in zip(ord_idx, m_axs.flatten()): c_ax.imshow(test_X[0][idx, :, :, 0], cmap='bone') title = 'Age: %2.1f\nPredicted Age: %2.1f\nGender: ' % (test_Y_months[idx], pred_Y[idx]) if test_X[1][idx] == 0: title += "Female\n" else: title += "Male\n" c_ax.set_title(title) c_ax.axis('off') plt.savefig('visulize_xray.png') # Done total_sec = time.time() - start logger.info("Total run took {} (Hours:Min:Sec)".format(str(datetime.timedelta( seconds=total_sec)))) logger.info("done!")
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001) early = EarlyStopping(monitor="val_loss", mode="min", patience=10) callbacks_list = [checkpoint, early, reduceLROnPlat] if not os.path.exists(weight_path): model.fit_generator(train_gen, steps_per_epoch=500, validation_data=(valid_X, valid_Y), epochs=10, callbacks=callbacks_list, verbose=1) else: model.load_weights(weight_path) test_X, test_Y = next( flow_from_dataframe(test_df, path_col='path', y_col='Sign', target_size=IMG_SIZE, color_mode='rgb', batch_size=test_size)) pred_Y = model.predict(test_X, batch_size=25, verbose=1) print("Mean absolute error on test data: " + str(sk_mae(test_Y, pred_Y)))