def submission(): """ Generate submission file for the trained models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading models weights...') model_systole.load_weights('weights_systole23.hdf5') model_diastole.load_weights('weights_diastole50.hdf5') # load val losses to use as sigmas for CDF with open('val_loss.txt', mode='r') as f: val_loss_systole = float(f.readline()) val_loss_diastole = float(f.readline()) print('Loading validation data...') X, ids = load_validation_data() #print('Pre-processing images...') #X = preprocess(X) batch_size = 32 print('Predicting on validation data...') pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) # real predictions to CDF cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('data/sample_submission_validate.csv')) f = open('submission.csv', 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(fi.next()) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights('weights_systole.hdf5', overwrite=True) model_diastole.save_weights('weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
def train(data_prefix, prefix, seed, run): """ Training systole and diastole models. """ print('Loading training data...') X, y = load_train_data(data_prefix, seed) print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-' * 50) print('Training...') print('-' * 50) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=True) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) systole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_systole_best.hdf5", verbose=1, save_best_only=True) diastole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_diastole_best.hdf5", verbose=1, save_best_only=True) systole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_systole.hdf5", verbose=1, save_best_only=False) diastole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_diastole.hdf5", verbose=1, save_best_only=False) if run == 0 or run == 1: print('Fitting Systole Shapes') hist_systole = model_systole.fit_generator( datagen.flow(X_train, y_train[:, 2], batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_test[:, 2]), callbacks=[systole_checkpointer, systole_checkpointer_best], nb_worker=4) if run == 0 or run == 2: print('Fitting Diastole Shapes') hist_diastole = model_diastole.fit_generator( datagen.flow(X_train, y_train[:, 2], batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_test[:, 2]), callbacks=[diastole_checkpointer, diastole_checkpointer_best], nb_worker=4) if run == 0 or run == 1: loss_systole = hist_systole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] if run == 0 or run == 2: loss_diastole = hist_diastole.history['loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and run == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps( cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps( cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # save best (lowest) val losses in file (to be later used for generating submission) with open(prefix + 'val_loss.txt', mode='w+') as f: if run == 0 or run == 1: f.write(str(min(hist_systole.history['val_loss']))) f.write('\n') if run == 0 or run == 2: f.write(str(min(hist_diastole.history['val_loss'])))
def train(): """ Training systole and diastole models. """ logging.info('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() logging.info('Loading training data...') X, y = load_train_data() logging.info('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.15) nb_iter = 600 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max logging.info('-'*50) logging.info('Training...') logging.info('-'*50) for i in range(nb_iter): logging.info('-'*50) logging.info('Iteration {0}/{1}'.format(i + 1, nb_iter)) logging.info('-'*50) logging.info('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 20) logging.info('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) logging.info('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) logging.info('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: logging.info('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) logging.info('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) logging.info('CRPS(test) = {0}'.format(crps_test)) logging.info('Saving weights...') # save weights so they can be loaded later model_systole.save_weights('../models/weights/weights_systole.hdf5', overwrite=True) model_diastole.save_weights('../models/weights/weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('../models/weights/weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('../models/weights/weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('./logs/val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
if os.path.isfile('/data/run2/weights_systole_best.hdf5'): print('loading weights') model_systole.load_weights('/data/run2/weights_systole_best.hdf5') if os.path.isfile('/data/run2/weights_diastole_best.hdf5'): model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5') print('Loading training data...') X, y, metadata = load_train_data() print(metadata[0, :].shape, metadata[0].shape, metadata[0:1, :].shape) pred_systole = model_systole.predict({'input1':X[40:50, :, : , :], 'input2':metadata[40:50, :], 'output':y[40:50, 0]})['output'] pred_diastole = model_diastole.predict({'input1':X[40:50,:,:,:], 'input2':metadata[40:50, :], 'output':y[40:50, 1]})['output'] # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y[40:50, 0], y[40:50, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, 27.7407048805) cdf_pred_diastole = real_to_cdf(pred_diastole, 38.5512729527) np.save('systolecdf.npy', cdf_pred_systole) np.save('diastolecdf.npy', cdf_pred_diastole) np.save('gt.npy', cdf_train) # print(cdf_pred_diastole) # # evaluate CRPS on training data
def build_submission(config): model_systole = get_model() model_diastole = get_model() print('Loading models weights...') model_systole.load_weights(config.systole_weights) model_diastole.load_weights(config.diastole_weights) # load val losses to use as sigmas for CDF with open(config.val_loss_systole, 'r') as f: val_loss_systole = float(f.readline()) with open(config.val_loss_diastole, 'r') as f: val_loss_diastole = float(f.readline()) print('Loading validation data...') X, ids, mult = load_validation_data() batch_size = 32 print('Predicting on validation data...') pred_normed_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_normed_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) print('Normed_systole:', pred_normed_systole.shape) print('Normed_diastole:', pred_normed_diastole.shape) print('mult:', mult.shape) pred_systole = pred_normed_systole[:,0] * mult pred_diastole = pred_normed_diastole[:,0] * mult print('systole:', pred_systole.shape) print('diastole:', pred_diastole.shape) # real predictions to CDF cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('/data/sample_submission_validate.csv')) f = open(config.submission, 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(next(fi)) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() #import best model if it exists if os.path.isfile('/data/run2/weights_systole_best.hdf5'): print('loading weights') model_systole.load_weights('/data/run2/weights_systole_best.hdf5') if os.path.isfile('/data/run2/weights_diastole_best.hdf5'): model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5') print('Loading training data...') X, y, metadata = load_train_data() #print('Pre-processing images...') #X = preprocess(X) #np.save('/data/pre/pre/X_train.npy', X) # split to training and test X_train, y_train, X_test, y_test, metadata_train, metadata_test = split_data( X, y, metadata, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 8 calc_crps = 5 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-' * 50) print('Training...') print('-' * 50) for i in range(0, nb_iter): print('-' * 50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-' * 50) # print('Augmenting images - rotations') # X_train_aug = rotation_augmentation(X_train, 15) # print('Augmenting images - shifts') # X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) # print('Augmenting images - shifts') # X_train_aug = shift_augmentation(X_train, 0.1, 0.1) X_train_aug = X_train print('Fitting systole model...') hist_systole = model_systole.fit( { 'input1': X_train_aug, 'input2': metadata_train, 'output': y_train[:, 0] }, shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data={ 'input1': X_test, 'input2': metadata_test, 'output': y_test[:, 0] }) print('Fitting diastole model...') hist_diastole = model_diastole.fit( { 'input1': X_train_aug, 'input2': metadata_train, 'output': y_train[:, 1] }, shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data={ 'input1': X_test, 'input2': metadata_test, 'output': y_test[:, 1] }) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict( { 'input1': X_train, 'input2': metadata_train, 'output': y_train[:, 0] }, batch_size=batch_size, verbose=1)['output'] pred_diastole = model_diastole.predict( { 'input1': X_train, 'input2': metadata_train, 'output': y_train[:, 1] }, batch_size=batch_size, verbose=1)['output'] val_pred_systole = model_systole.predict( { 'input1': X_test, 'input2': metadata_test, 'output': y_test[:, 0] }, batch_size=batch_size, verbose=1)['output'] val_pred_diastole = model_diastole.predict( { 'input1': X_test, 'input2': metadata_test, 'output': y_test[:, 1] }, batch_size=batch_size, verbose=1)['output'] # Get sigmas # sigma_systole = as_tensor_variable(root_mean_squared_error(y_train[:, 0], pred_systole)) # sigma_diastole = as_tensor_variable(root_mean_squared_error(y_train[:, 1], pred_systole)) # val_sigma_systole = as_tensor_variable(root_mean_squared_error(y_test[:, 0], val_pred_systole)) # val_sigma_diastole = as_tensor_variable(root_mean_squared_error(y_test[:, 1], val_pred_diastole)) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf( np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps( cdf_train, np.concatenate( (cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps( cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('/data/run2/weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights( '/data/run2/weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('/data/run2/val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole)) with open("/data/run2/loss.txt", "a+") as myfile: myfile.write('\t'.join( (str(i + 1), str(loss_systole), str(loss_diastole), str(val_loss_systole), str(val_loss_diastole), str(crps_train), str(crps_test)))) myfile.write('\n')
def c3d_submission(): print('Loading and compiling models...') model_systole = c3d_sys() model_diastole =c3d_dia() print('Loading models weights...') #model_systole.load_weights('vgg16_weights_112/weights_systole_best.hdf5') #model_diastole.load_weights('vgg16_weights_112/weights_diastole_best.hdf5') set_weights(model_systole, 'sys_best_c3d.pkl') set_weights(model_diastole, 'dia_best_c3d.pkl') # load val losses to use as sigmas for CDF with open('val_loss.txt', mode='r') as f: val_loss_systole = float(f.readline()) val_loss_diastole = float(f.readline()) print('Loading validation data...') X, ids = load_validation_data() print('Pre-processing images...') #X = preprocess(X) batch_size = 32 x_len = X.shape[0] print('Predicting on validation data...') #pred_systole1 = model_systole.predict(X[:x_len,np.newaxis,:,:,:]) #pred_systole2 = model_systole.predict(X[x_len:,np.newaxis,:,:,:]) #pred_diastole1 = model_diastole.predict(X[:x_len, np.newaxis,:,:,:]) #pred_diastole2 = model_diastole.predict(X[x_len:, np.newaxis,:,:,:]) #pred_systole = (pred_systole1 + pred_systole2) * 0.5 #pred_diastole = (pred_diastole1 + pred_diastole2) *0.5 pred_systole = model_systole.predict(X[:, np.newaxis, :, :,:]) pred_diastole = model_diastole.predict(X[:,np.newaxis, :,:,:]) pred_systole = (pred_systole[:len(pred_systole)/2] + pred_systole[len(pred_systole)/2:] ) * 0.5 pred_diastole =(pred_diastole[:len(pred_diastole)/2] + pred_diastole[len(pred_diastole)/2:] ) * 0.5 # real predictions to CDF cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('data/sample_submission_validate.csv')) f = open('c3d_channel_15.csv', "w+") fo = csv.writer(f, lineterminator='\n') fo.writerow(fi.next()) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def train(data_prefix, prefix, seed, run): """ Training systole and diastole models. """ print('Loading training data...') X, y = load_train_data(data_prefix, seed) print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=True) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) systole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_systole_best.hdf5", verbose=1, save_best_only=True) diastole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_diastole_best.hdf5", verbose=1, save_best_only=True) systole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_systole.hdf5", verbose=1, save_best_only=False) diastole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_diastole.hdf5", verbose=1, save_best_only=False) if run == 0 or run == 1: print('Fitting Systole Shapes') hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_train[:, 2], batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_test[:, 2]), callbacks=[systole_checkpointer, systole_checkpointer_best], nb_worker=4) if run == 0 or run == 2: print('Fitting Diastole Shapes') hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_train[:, 2], batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_test[:, 2]), callbacks=[diastole_checkpointer, diastole_checkpointer_best], nb_worker=4) if run == 0 or run == 1: loss_systole = hist_systole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] if run == 0 or run == 2: loss_diastole = hist_diastole.history['loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and run == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # save best (lowest) val losses in file (to be later used for generating submission) with open(prefix + 'val_loss.txt', mode='w+') as f: if run == 0 or run == 1: f.write(str(min(hist_systole.history['val_loss']))) f.write('\n') if run == 0 or run == 2: f.write(str(min(hist_diastole.history['val_loss'])))
def train(): print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') model_systole.save_weights('weights_systole.hdf5', overwrite=True) model_diastole.save_weights('weights_diastole.hdf5', overwrite=True) if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() #import best model if it exists if os.path.isfile('/data/run2/weights_systole_best.hdf5'): print('loading weights') model_systole.load_weights('/data/run2/weights_systole_best.hdf5') if os.path.isfile('/data/run2/weights_diastole_best.hdf5'): model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5') print('Loading training data...') X, y, metadata = load_train_data() #print('Pre-processing images...') #X = preprocess(X) #np.save('/data/pre/pre/X_train.npy', X) # split to training and test X_train, y_train, X_test, y_test, metadata_train, metadata_test = split_data(X, y, metadata, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 8 calc_crps = 5 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(0,nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) # print('Augmenting images - rotations') # X_train_aug = rotation_augmentation(X_train, 15) # print('Augmenting images - shifts') # X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) # print('Augmenting images - shifts') # X_train_aug = shift_augmentation(X_train, 0.1, 0.1) X_train_aug = X_train print('Fitting systole model...') hist_systole = model_systole.fit({'input1':X_train_aug, 'input2':metadata_train, 'output':y_train[:, 0]}, shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data={'input1':X_test,'input2':metadata_test, 'output':y_test[:, 0]}) print('Fitting diastole model...') hist_diastole = model_diastole.fit({'input1':X_train_aug, 'input2':metadata_train, 'output':y_train[:, 1]}, shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data={'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 1]}) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict({'input1':X_train, 'input2':metadata_train, 'output':y_train[:, 0]}, batch_size=batch_size, verbose=1)['output'] pred_diastole = model_diastole.predict({'input1':X_train, 'input2':metadata_train, 'output':y_train[:, 1]}, batch_size=batch_size, verbose=1)['output'] val_pred_systole = model_systole.predict({'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 0]}, batch_size=batch_size, verbose=1)['output'] val_pred_diastole = model_diastole.predict({'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 1]}, batch_size=batch_size, verbose=1)['output'] # Get sigmas # sigma_systole = as_tensor_variable(root_mean_squared_error(y_train[:, 0], pred_systole)) # sigma_diastole = as_tensor_variable(root_mean_squared_error(y_train[:, 1], pred_systole)) # val_sigma_systole = as_tensor_variable(root_mean_squared_error(y_test[:, 0], val_pred_systole)) # val_sigma_diastole = as_tensor_variable(root_mean_squared_error(y_test[:, 1], val_pred_diastole)) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('/data/run2/weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('/data/run2/weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('/data/run2/val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole)) with open("/data/run2/loss.txt", "a+") as myfile: myfile.write('\t'.join((str(i+1), str(loss_systole),str(loss_diastole),str(val_loss_systole),str(val_loss_diastole), str(crps_train), str(crps_test)))) myfile.write('\n')
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = VGG_16_112('vgg16_weights_112/weights_systole_best.hdf5') model_diastole =VGG_16_112('vgg16_weights_112/weights_diastole_best.hdf5') #model_systole = VGG_16_112() #model_diastole = VGG_16_112() print('Loading training data...') X, y = load_train_data() # print('Pre-processing images...') # X = preprocess(X) # split to training and test X_train_aug, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) #X_train_aug = X_train nb_epoch = 100 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max datagen = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(X_train_aug) for i in range(nb_epoch): print('-'*40) print('Epoch', i) print('-'*40) print("Training systole...") # batch train with realtime data augmentation loss_systole = val_loss_systole = 0 progbar = generic_utils.Progbar(X_train_aug.shape[0]) for X_batch, Y_batch in datagen.flow(X_train_aug, y_train[:,0]): loss_systole = model_systole.train_on_batch(X_batch, Y_batch) loss_systole = loss_systole[-1] progbar.add(X_batch.shape[0], values=[("train loss", loss_systole)]) print("Testing systole...") # test time! progbar = generic_utils.Progbar(X_test.shape[0]) for X_batch, Y_batch in datagen.flow(X_test, y_test[:,0]): val_loss_systole = model_systole.test_on_batch(X_batch, Y_batch) val_loss_systole = val_loss_systole[-1] progbar.add(X_batch.shape[0], values=[("test loss", val_loss_systole)]) print("Training diastole...") loss_diastole = val_loss_diastole = 0 progbar = generic_utils.Progbar(X_train_aug.shape[0]) for X_batch, Y_batch in datagen.flow(X_train_aug, y_train[:,1]): loss_diastole = model_diastole.train_on_batch(X_batch, Y_batch) loss_diastole = loss_diastole[-1] progbar.add(X_batch.shape[0], values=[("train loss", loss_diastole)]) print("Testing diastole...") # test time! progbar = generic_utils.Progbar(X_test.shape[0]) for X_batch, Y_batch in datagen.flow(X_test, y_test[:,1]): val_loss_diastole = model_diastole.test_on_batch(X_batch, Y_batch) val_loss_diastole = val_loss_diastole[-1] progbar.add(X_batch.shape[0], values=[("test loss", val_loss_diastole)]) # print('Fitting systole model...') # hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, # batch_size=batch_size), validation_data=(X_test, y_test[:, 0])) # print('Fitting diastole model...') # hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, # batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) # loss_systole = hist_systole.history['loss'][-1] # loss_diastole = hist_diastole.history['loss'][-1] # val_loss_systole = hist_systole.history['val_loss'][-1] # val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train_aug, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train_aug, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights('vgg16_weights_112/weights_systole.hdf5', overwrite=True) model_diastole.save_weights('vgg16_weights_112/weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('vgg16_weights_112/weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('vgg16_weights_112/weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model(img_size) model_diastole = get_model(img_size) print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) # define image generator for random rotations datagen = ImageDataGenerator(featurewise_center=False, featurewise_std_normalization=False, rotation_range=15) nb_iter = 300 epochs_per_iter = 1 batch_size = 64 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max if not os.path.exists(STATS): os.makedirs(STATS) with open(STATS + 'RMSE_CRPS.txt', 'w') as f: names = ['train_RMSE_d', 'train_RMSE_s', 'test_RMSE_d', 'test_RMSE_s', 'train_crps', 'test_crps'] f.write('\t'.join([str(name) for name in names]) + '\n') print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights(MODELS + 'weights_systole.hdf5', overwrite=True) model_diastole.save_weights(MODELS + 'weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open(MODELS + 'val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole)) with open(STATS + 'RMSE_CRPS.txt', 'a') as f: # train_RMSE_d train_RMSE_s test_RMSE_d test_RMSE_s train_crps test_crps rmse_values = [loss_diastole, loss_systole, val_loss_diastole, val_loss_systole] crps_values = [crps_train, crps_test] f.write('\t'.join([str(val) for val in rmse_values + crps_values]) + '\n') print('Saving stats images...') write_images(STATS) if (i != 0) & ((i + 1) % 100 == 0): print('Submitting learned model....') SUBMISSION_FOLDER = SUBMISSION + preproc_type + "/" + model_name + "/" + get_name() + "_ITERS" + str(i + 1) + "/" if not os.path.exists(SUBMISSION_FOLDER): os.makedirs(SUBMISSION_FOLDER) copyfile(MODELS + 'weights_systole_best.hdf5', SUBMISSION_FOLDER + 'weights_systole_best.hdf5') copyfile(MODELS + 'weights_diastole_best.hdf5', SUBMISSION_FOLDER + 'weights_diastole_best.hdf5') copyfile(MODELS + 'val_loss.txt', SUBMISSION_FOLDER + 'val_loss.txt') os.system('python submission.py %s %s %s' % (preproc_type, model_name, SUBMISSION_FOLDER))
import pickle import pandas as pd from train import extract_features from utils import real_to_cdf if __name__ == '__main__': metadata = pd.read_csv('data/metadata_validate.csv') features = extract_features(metadata).set_index('Id').sort_index() diastole_model = pickle.load(open('diastole.pkl')) systole_model = pickle.load(open('systole.pkl')) diastole = diastole_model.predict(features) systole = systole_model.predict(features) systole_cdf = real_to_cdf(systole, sigma=1e-10) diastole_cdf = real_to_cdf(diastole, sigma=1e-10) submission = pd.DataFrame(columns=['Id'] + ['P%d' % i for i in range(600)]) i = 0 for id in range(features.shape[0]): diastole_id = '%d_Diastole' % features.index[id] systole_id = '%d_Systole' % features.index[id] submission.loc[i, :] = [diastole_id] + diastole_cdf[id, :].tolist() submission.loc[i+1, :] = [systole_id] + systole_cdf[id, :].tolist() i += 2 submission.to_csv('submission.csv', index=False)
def build_submission(config): model_systole = get_model() model_diastole = get_model() print('Loading models weights...') model_systole.load_weights(config.systole_weights) model_diastole.load_weights(config.diastole_weights) # load val losses to use as sigmas for CDF with open(config.val_loss_systole, 'r') as f: val_loss_systole = float(f.readline()) with open(config.val_loss_diastole, 'r') as f: val_loss_diastole = float(f.readline()) print('Loading validation data...') X, ids, mult = load_validation_data() batch_size = 32 print('Predicting on validation data...') pred_normed_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_normed_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) print('Normed_systole:', pred_normed_systole.shape) print('Normed_diastole:', pred_normed_diastole.shape) print('mult:', mult.shape) pred_systole = pred_normed_systole[:, 0] * mult pred_diastole = pred_normed_diastole[:, 0] * mult print('systole:', pred_systole.shape) print('diastole:', pred_diastole.shape) # real predictions to CDF cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('/data/sample_submission_validate.csv')) f = open(config.submission, 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(next(fi)) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) #Iteraties was 200 nb_iter = 250 epochs_per_iter = 1 ## Batch-size was 32, ivm processing op laptop heb ik hiervan 12 gemaakt voor test #3 batch_size = 12 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-' * 50) print('Training...') start = datetime.now() print('-' * 50) for i in range(nb_iter): print('-' * 50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-' * 50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) csv_logger_diastole = CSVLogger('training_diastole.log', append=True, separator=';') csv_logger_systole = CSVLogger('training_systole.log', append=True, separator=';') print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0]), callbacks=[csv_logger_systole]) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1]), callbacks=[csv_logger_diastole]) dialoss_history = hist_diastole.history["loss"] sysloss_history = hist_systole.history["loss"] import numpy numpy_dialoss_history = numpy.array(dialoss_history) numpy_sysloss_history = numpy.array(sysloss_history) numpy.savetxt("dialoss_history.txt", numpy_dialoss_history, delimiter=",") numpy.savetxt("sysloss_history.txt", numpy_sysloss_history, delimiter=",") # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) ## DEZE BOVENSTAANDE VALUES ZIJN DE RESULTATEN ## ## try ## accuracy_systole = pred_systole - val_pred_systole print("Pred_diastole:") print(pred_diastole) print("Pred_systole:") print(pred_systole) print("Val_pred_sysyole:") print(val_pred_systole) print("Val_pred_diastole:") print(val_pred_diastole) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf( np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps( cdf_train, np.concatenate( (cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps( cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) """ BEGIN PLOTTING RESULTS """ import matplotlib.pyplot as plt import numpy # score = model_systole.evaluate(X_test, Y_test, verbose=0) # print("Score systole") # print(score) """ EIND PLOTTING RESULT """ print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights('weights_systole.hdf5', overwrite=True) model_diastole.save_weights('weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True) ##Start accuracy plot for systole and diastole ## pyplot.plot(history.history['acc']) ##pyplot.show() # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))