def submission(): """ Generate submission file for the trained models. """ print('Loading and compiling models...') model_systole = get_vgg_model() model_diastole = get_vgg_model() print('Loading models weights...') model_systole.load_weights('weights_systole_best.hdf5') model_diastole.load_weights('weights_diastole_best.hdf5') print('Loading validation data...') X, ids = load_validation_data() print('Pre-processing images...') X = preprocess(X) batch_size = 32 print('Predicting on validation data...') pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) # real predictions to CDF cdf_pred_systole = pred_systole.cumsum(axis=-1) cdf_pred_diastole = pred_diastole.cumsum(axis=-1) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('/data/heart/sample_submission_test.csv')) f = open('submission.csv', 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(fi.next()) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def train(train_prefix_dir="/data/heart"): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_vgg_model() model_diastole = get_vgg_model() print('Loading training data...') X, y = load_train_data(train_prefix_dir) print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) # Create Image Augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=True) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) # Create model checkpointers for systole and diastole systole_checkpointer_best = ModelCheckpoint(filepath="weights_systole_best.hdf5", verbose=1, save_best_only=True) diastole_checkpointer_best = ModelCheckpoint(filepath="weights_diastole_best.hdf5", verbose=1, save_best_only=True) systole_checkpointer = ModelCheckpoint(filepath="weights_systole.hdf5", verbose=1, save_best_only=False) diastole_checkpointer = ModelCheckpoint(filepath="weights_diastole.hdf5", verbose=1, save_best_only=False) # Create 600-dimentional y cdfs from observations y_syst_train = np.array([(i < np.arange(600)) for i in y_train[:, 0]], dtype=np.uint8) y_syst_test = np.array([(i < np.arange(600)) for i in y_test[:, 0]], dtype=np.uint8) y_diast_train = np.array([(i < np.arange(600)) for i in y_train[:, 1]], dtype=np.uint8) y_diast_test = np.array([(i < np.arange(600)) for i in y_test[:, 1]], dtype=np.uint8) print('Fitting Systole Shapes') hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_syst_train, batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_syst_test), callbacks=[systole_checkpointer, systole_checkpointer_best], nb_worker=1) print('Fitting Diastole Shapes') hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_diast_train, batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_diast_test), callbacks=[diastole_checkpointer, diastole_checkpointer_best], nb_worker=1) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min(hist_systole.history['val_loss']))) f.write('\n') f.write(str(min(hist_diastole.history['val_loss']))) """