def create_loo_train_test_set(data_src, data_stem_sm, data_stem_lg, train_ids, test_id): # get smaller patches first x_tr = [] y_tr = [] for tid in train_ids: train_name = data_stem_sm + str(tid) x_train, y_train = createShapeData.get_int_paired_format_flattened(data_src, train_name) x_tr.append(x_train) y_tr.append(y_train) x_tr_sm = np.concatenate(x_tr) y_tr_sm = np.concatenate(y_tr) test_name = data_stem_sm + str(test_id) x_test_sm, y_test_sm = createShapeData.get_int_paired_format_flattened(data_src, test_name) # get larger patches next x_tr = [] y_tr = [] for tid in train_ids: train_name = data_stem_lg + str(tid) x_train, y_train = createShapeData.get_int_paired_format(data_src, train_name) x_tr.append(x_train) y_tr.append(y_train) x_tr_all_lg = np.concatenate(x_tr) # y_tr_all_lg = np.concatenate(y_tr) test_name = data_stem_lg + str(test_id) x_test_lg, y_test_lg = createShapeData.get_int_paired_format(data_src, test_name) return x_tr_sm, x_test_sm, y_tr_sm, y_test_sm, x_tr_all_lg, x_test_lg
def create_loo_train_test_set(src, data_stem, train_ids, test_id): x_tr = [] y_tr = [] for tid in train_ids: train_name = data_stem + str(tid) x_train, y_train = createShapeData.get_int_paired_format(src, train_name) x_tr.append(x_train) y_tr.append(y_train) x_tr_all = np.concatenate(x_tr) y_tr_all = np.concatenate(y_tr) test_name = data_stem + str(test_id) x_test, y_test = createShapeData.get_int_paired_format(src, test_name) return x_tr_all, x_test, y_tr_all, y_test
def train_on_dsea_data(model): # first freeze if necessary to_freeze = False if to_freeze: # freeze the feature generation layers - no need to train these. no_to_freeze = 2 for i in range(no_to_freeze): model.layers[2].layers[i].Trainable = False print('frozen: ' + str(model.layers[2].layers[i])) # load dsea patch data dsea_src = '/home/nripesh/Dropbox/research_matlab/feature_tracking/generating_train_data_forNNet/' \ 'dsea_data_based_train_patches/' data_dsea_name = 'dsea_data_patch_pairs_augm_size_9' save_dsea_name = 'dsea_trf_and_augm_match_model_k3.h5' x_d, y_d = createShapeData.get_int_paired_format(dsea_src, data_dsea_name) x_train_d, x_test_d, y_train_d, y_test_d = train_test_split(x_d, y_d, test_size=.25) # compile and train again nb_epoch_dsea = 15 opt_func = RMSprop() model.compile(loss=contrastive_loss, optimizer=opt_func) model.fit([x_train_d[:, 0], x_train_d[:, 1]], y_train_d, validation_split=.25, batch_size=32, verbose=2, nb_epoch=nb_epoch_dsea, callbacks=[EarlyStopping(monitor='val_loss', patience=2)]) model.save('/home/nripesh/PycharmProjects/Siamese/real_data/' + save_dsea_name) # compute final accuracy on training and test sets pred_tr = model.predict([x_train_d[:, 0], x_train_d[:, 1]]) pred_ts = model.predict([x_test_d[:, 0], x_test_d[:, 1]]) # get auc scores tpr, fpr, _ = roc_curve(y_test_d, pred_ts) roc_auc = auc(fpr, tpr) print('AUC score: ' + str(roc_auc))
def train_from_leuven_data(): src = '/home/nripesh/Dropbox/research_matlab/feature_tracking/generating_train_data_forNNet/' data_stem = 'x_data_intensity_comb_' save_name = 'leuven_model_to_transfer_k3.h5' tr_epoch = 5 x_tr = [] y_tr = [] train_ids = [1, 2, 3, 4, 5] test_id = 2 for tid in train_ids: train_name = data_stem + str(tid) x_train, y_train = createShapeData.get_int_paired_format( src, train_name) x_tr.append(x_train) y_tr.append(y_train) x_tr_all = np.concatenate(x_tr) y_tr_all = np.concatenate(y_tr) # test data test_name = data_stem + str(test_id) x_test, y_test = createShapeData.get_int_paired_format(src, test_name) input_dim = x_tr_all.shape[2:] input_a = Input(shape=input_dim) input_b = Input(shape=input_dim) base_network = create_cnn_network(input_dim) processed_a = base_network(input_a) processed_b = base_network(input_b) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [processed_a, processed_b]) model_tr = Model(input=[input_a, input_b], output=distance) # train opt_func = RMSprop() model_tr.compile(loss=contrastive_loss, optimizer=opt_func) model_tr.fit([x_tr_all[:, 0], x_tr_all[:, 1]], y_tr_all, validation_split=.30, batch_size=128, verbose=2, nb_epoch=tr_epoch, callbacks=[EarlyStopping(monitor='val_loss', patience=2)]) model_tr.save('/home/nripesh/PycharmProjects/Siamese/real_data/' + save_name) # test # compute final accuracy on training and test sets pred_ts = model_tr.predict([x_test[:, 0], x_test[:, 1]]) # get auc scores tpr, fpr, _ = roc_curve(y_test, pred_ts) roc_auc = auc(fpr, tpr) target = open('auc_scores_summary_transfer_learning.txt', 'a') target.write("endo, trained on: " + str(train_ids) + ", tested on: " + str(test_id) + ", auc: " + str(roc_auc) + "\n") target.close() print("endo, trained on: " + str(train_ids) + ", tested on: " + str(test_id) + ", auc: " + str(roc_auc) + "\n") return model_tr
# activation='relu')) # # seq.add(MaxPooling3D(pool_size=(2, 2, 2), dim_ordering='th')) # downsample # seq.add(Dropout(.25)) # dense layers seq.add(Flatten()) seq.add(Dense(100, activation='relu')) seq.add(Dropout(0.2)) seq.add(Dense(50, activation='relu')) return seq # load data src = '/home/nripesh/Dropbox/research_matlab/feature_tracking/matconvnet-1.0-beta21/cardiac_data/' data_name = 'x_data_intensity_mixed' x, y = createShapeData.get_int_paired_format(src, data_name) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25) # because we re-use the same instance `base_network`, # the weights of the network # will be shared across the two branches input_dim = x_train.shape[2:] input_a = Input(shape=input_dim) input_b = Input(shape=input_dim) base_network = create_cnn_network(input_dim) processed_a = base_network(input_a) processed_b = base_network(input_b) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [processed_a, processed_b])
# train_name = 'x_data_intensity_endo_1_2' # test_name = 'x_data_intensity_endo_3' # x_train, y_train = createShapeData.get_int_paired_format(src, train_name) # x_test, y_test = createShapeData.get_int_paired_format(src, test_name) # model = train_model(x_train, y_train, 12) # run_test(model, x_test, y_test, 1, 2, 3) # # # load 1 and 3 and test on 2 # train_name = 'x_data_intensity_endo_1_3' # test_name = 'x_data_intensity_endo_2' # x_train, y_train = createShapeData.get_int_paired_format(src, train_name) # x_test, y_test = createShapeData.get_int_paired_format(src, test_name) # model = train_model(x_train, y_train, 12) # run_test(model, x_test, y_test, 1, 3, 2) # load 2 and 3 and test on 1 train_name = 'x_data_intensity_endo_2_3' test_name = 'x_data_intensity_endo_1' x_train, y_train = createShapeData.get_int_paired_format(src, train_name) x_test, y_test = createShapeData.get_int_paired_format(src, test_name) model = train_model(x_train, y_train, 12) run_test(model, x_test, y_test, 2, 3, 1) # final model, train on all group train_name = 'x_data_intensity_endo_all' x_train, y_train = createShapeData.get_int_paired_format(src, train_name) x_test, y_test = createShapeData.get_int_paired_format(src, test_name) model = train_model(x_train, y_train, 12) print("endo trained on: all data") # run_test(model, x_test, y_test, 2, 3, 1)