def test(model_dir, data_dir, results_subdir, random_seed, resolution): np.random.seed(random_seed) tf.set_random_seed(np.random.randint(1 << 31)) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) set_session(sess) # parser config config_file = model_dir + "/config.ini" print("Config File Path:", config_file, flush=True) assert os.path.isfile(config_file) cp = ConfigParser() cp.read(config_file) # default config image_dimension = cp["TRAIN"].getint("image_dimension") batch_size = cp["TEST"].getint("batch_size") use_best_weights = cp["TEST"].getboolean("use_best_weights") print("** DenseNet input resolution:", image_dimension, flush=True) print("** GAN image resolution:", resolution, flush=True) log2_record = int(np.log2(resolution)) record_file_ending = "*" + np.str(log2_record) + ".tfrecords" print("** Resolution ", resolution, " corresponds to ", record_file_ending, " TFRecord file.", flush=True) output_dir = os.path.join( results_subdir, "classification_results_res_" + np.str(2**log2_record) + "/test") print("Output Directory:", output_dir, flush=True) if not os.path.isdir(output_dir): os.makedirs(output_dir) if use_best_weights: print("** Using BEST weights", flush=True) model_weights_path = os.path.join( results_subdir, "classification_results_res_" + np.str(2**log2_record) + "/train/best_weights.h5") else: print("** Using LAST weights", flush=True) model_weights_path = os.path.join( results_subdir, "classification_results_res_" + np.str(2**log2_record) + "/train/weights.h5") # get test sample count shutil.copy(results_subdir[:-4] + "/test/test.csv", output_dir) tfrecord_dir_te = os.path.join(data_dir, "test") class_names = get_class_names(output_dir, "test") test_counts, _ = get_sample_counts(output_dir, "test", class_names) # get indicies (all of csv file for validation) print("** test counts:", test_counts, flush=True) # compute steps test_steps = int(np.floor(test_counts / batch_size)) print("** test_steps:", test_steps, flush=True) # Get Model # ------------------------------------ input_shape = (image_dimension, image_dimension, 3) img_input = Input(shape=input_shape) base_model = DenseNet121(include_top=False, weights=None, input_tensor=img_input, input_shape=input_shape, pooling="avg") x = base_model.output predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x) model = Model(inputs=img_input, outputs=predictions) print(" ** load model from:", model_weights_path, flush=True) model.load_weights(model_weights_path) # ------------------------------------ print("** load test generator **", flush=True) test_seq = TFWrapper(tfrecord_dir=tfrecord_dir_te, record_file_endings=record_file_ending, batch_size=batch_size, model_target_size=(image_dimension, image_dimension), steps=None, augment=False, shuffle=False, prefetch=True, repeat=False) print("** make prediction **", flush=True) test_seq.initialise() #MAKE SURE REINIT y_hat = model.predict_generator(test_seq, workers=0) test_seq.initialise() #MAKE SURE REINIT y = test_seq.get_y_true() test_log_path = os.path.join(output_dir, "test.log") print("** write log to", test_log_path, flush=True) aurocs = [] tpr_fpr_thr = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): tpr, fpr, thr = roc_curve(y[:, i], y_hat[:, i]) roc_rates = np.concatenate( (fpr.reshape(-1, 1), tpr.reshape(-1, 1), thr.reshape(-1, 1)), axis=1) tpr_fpr_thr.append(roc_rates) try: score = roc_auc_score(y[:, i], y_hat[:, i]) if score < 0.5: score = 1. - score aurocs.append(score) except ValueError: score = 0 f.write(np.str(class_names[i]) + " : " + np.str(score) + "\n") mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write("mean auroc: " + np.str(mean_auroc) + "\n") print("mean auroc:", mean_auroc, flush=True) roc_char = np.asarray(tpr_fpr_thr) np.save(output_dir + "/roc_char.npy", roc_char) print("Saved ROC data (TPR, FPR, THR) to:", output_dir + "/roc_char.npy", flush=True)
def nn(model_dir, data_dir, results_subdir, random_seed, resolution): np.random.seed(random_seed) tf.set_random_seed(np.random.randint(1 << 31)) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) set_session(sess) # parser config config_file = model_dir + "/config.ini" print("Config File Path:", config_file, flush=True) assert os.path.isfile(config_file) cp = ConfigParser() cp.read(config_file) output_dir = os.path.join(results_subdir, "classification_results/nn") train_outdir = os.path.join(results_subdir, "classification_results/train") print("Output Directory:", output_dir, flush=True) # default config image_dimension = cp["TRAIN"].getint("image_dimension") gan_resolution = resolution batch_size = cp["TEST"].getint("batch_size") use_best_weights = cp["TEST"].getboolean("use_best_weights") if use_best_weights: print("** Using BEST weights", flush=True) model_weights_path = os.path.join( results_subdir, "classification_results/nn/best_weights.h5") else: print("** Using LAST weights", flush=True) model_weights_path = os.path.join( results_subdir, "classification_results/nn/weights.h5") print("** DenseNet Input Resolution:", image_dimension, flush=True) print("** GAN Image Resolution:", gan_resolution, flush=True) tfrecord_dir_tr = os.path.join(data_dir, "train") tfrecord_dir_te = os.path.join(results_subdir, "inference/test") # Get class names class_names = get_class_names(train_outdir, "train") counts, _ = get_sample_counts(train_outdir, "train", class_names) # get indicies (all of csv file for validation) print("** counts:", counts, flush=True) # compute steps train_steps = int(np.floor(counts / batch_size)) print("** t_steps:", train_steps, flush=True) log2_record = int(np.log2(gan_resolution)) record_file_ending = "*" + np.str(log2_record) + ".tfrecords" print("** resolution ", gan_resolution, " corresponds to ", record_file_ending, " TFRecord file.", flush=True) # Get Model # ------------------------------------ input_shape = (image_dimension, image_dimension, 3) img_input = Input(shape=input_shape) base_model = DenseNet121(include_top=False, weights=None, input_tensor=img_input, input_shape=input_shape, pooling="avg") x = base_model.output predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x) model = Model(inputs=img_input, outputs=predictions) print(" ** load model from:", model_weights_path, flush=True) model.load_weights(model_weights_path) # ------------------------------------ # Extract representation layer output: layer_name = 'avg_pool' intermediate_layer_model = Model( inputs=model.input, outputs=model.get_layer(layer_name).output) #intermediate_output = intermediate_layer_model(data) def renorm_and_save_npy(x, name): imagenet_mean = np.array([0.485, 0.456, 0.406]) imagenet_std = np.array([0.229, 0.224, 0.225]) x = x * imagenet_std + imagenet_mean save_path = output_dir + "/" + name + ".npy" np.save(save_path, x) print("** save npy images under: ", save_path, flush=True) def save_array(x, name): save_path = output_dir + "/" + name + ".npy" np.save(save_path, x) print("** save npy images under: ", save_path, flush=True) # Load test Inference images test_bs = 200 print("** load inference images, save random n=", test_bs, flush=True) test_seq = TFWrapper(tfrecord_dir=tfrecord_dir_te, record_file_endings=record_file_ending, batch_size=test_bs, model_target_size=(image_dimension, image_dimension), steps=None, augment=False, shuffle=False, prefetch=True, repeat=False) test_seq.initialise() x, x_orig, x_label = test_seq.__getitem__(0) renorm_and_save_npy(x, name="real_inf_224") renorm_and_save_npy(x_orig, name="real_inf_256") save_array(x_label, name="real_inf_label") print("** Compute inf latent rep **", flush=True) x_latrep = intermediate_layer_model.predict(x) print("** Latent Size: ", x_latrep.shape, flush=True) # Load train Inference images print("** load train generator **", flush=True) train_seq = TFWrapper(tfrecord_dir=tfrecord_dir_tr, record_file_endings=record_file_ending, batch_size=batch_size, model_target_size=(image_dimension, image_dimension), steps=train_steps, augment=False, shuffle=False, prefetch=True, repeat=False) train_seq.initialise() print("** generator loaded **", flush=True) # Loop through training data and compute minimums H, H_orig = image_dimension, 256 W, W_orig = image_dimension, 256 D = 3 BS = batch_size n = test_bs LS = x_latrep.shape[1] cur_nn_imgs = np.zeros((n, H, W, D)) #Current nn images cur_nn_imgs_orig = np.zeros((n, H_orig, W_orig, D)) cur_nn_labels = np.zeros((n, x_label.shape[1])) cur_cos_min = np.ones((n, 1)) * 10000 #Current minimum cosine distance time_old = time.time() print("** Start nn determination **", flush=True) for i in range(0, train_steps): # Get batch images and lat. reps y, y_orig, y_label = train_seq.__getitem__(i) #[BS,H,W,D] y_latrep = intermediate_layer_model.predict(y) #[BS,LS] #y_reshaped = y.reshape([BS,1,H,W,D]) #Reshape for tiling [BS,1,H,W,D] #y_orig_reshaped = y_orig.reshape([BS,1,H_orig,W_orig,D]) #y_label_reshaped = y_label.reshape([BS,1,x_label.shape[1]]) y_tiled = np.tile(y, [1, n, 1, 1, 1]) #Tile: [BS,n,H,W,D] y_orig_tiled = np.tile(y_orig, [1, n, 1, 1, 1]) y_label_tiled = np.tile(y_label, [1, n, 1]) cosdis = np.ones( (n, BS)) - cosine_similarity(x_latrep, y_latrep) #[n,BS] argmin_cosdis = np.argmin(cosdis, axis=1) #[n,1] min_cosdis = np.min(cosdis, axis=1).reshape(n, 1) #[n,1] min_y = y_tiled[:, argmin_cosdis].reshape( n, H, W, D) #[n,H,W,D]: Min. Cosdis for each inf_img from batch min_y_orig = y_orig_tiled[:, argmin_cosdis].reshape(n, H_orig, W_orig, D) min_ylabel = y_label_tiled[:, argmin_cosdis].reshape( (n, x_label.shape[1])) t = np.where( min_cosdis < cur_cos_min ) #Indicies where min. cosdistance is smaller then current cur_cos_min[t[0]] = min_cosdis[t[0]] #Update current cosdis minima cur_nn_imgs[t[0]] = min_y[t[0]] #Update current nn images cur_nn_imgs_orig[t[0]] = min_y_orig[t[0]] cur_nn_labels[t[0]] = min_ylabel[t[0]] if i % 100 == 0 and i > 0: time_new = time.time() print("Iteration ", i, "/", train_steps, "took %.2f seconds" % (time_new - time_old)) time_old = time_new print("Current mean cos-distance:", np.mean(cur_cos_min)) print("** Loop Done **", flush=True) renorm_and_save_npy(cur_nn_imgs, name="nn_images_224") renorm_and_save_npy(cur_nn_imgs_orig, name="nn_images_256") save_array(cur_cos_min, name="cosdistance_minimum") save_array(cur_nn_labels, name="nn_labels")
def train(model_dir, results_subdir, random_seed, resolution): np.random.seed(random_seed) tf.set_random_seed(np.random.randint(1 << 31)) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) session_conf.gpu_options.allow_growth = True sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) set_session(sess) # parser config config_file = model_dir + "/config.ini" print("Config File Path:", config_file, flush=True) assert os.path.isfile(config_file) cp = ConfigParser() cp.read(config_file) # default config base_model_name = cp["DEFAULT"].get("base_model_name") # train config path_model_base_weights = cp["TRAIN"].get("path_model_base_weights") use_trained_model_weights = cp["TRAIN"].getboolean( "use_trained_model_weights") use_best_weights = cp["TRAIN"].getboolean("use_best_weights") output_weights_name = cp["TRAIN"].get("output_weights_name") epochs = cp["TRAIN"].getint("epochs") batch_size = cp["TRAIN"].getint("batch_size") initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate") image_dimension = cp["TRAIN"].getint("image_dimension") patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr") min_lr = cp["TRAIN"].getfloat("min_lr") positive_weights_multiply = cp["TRAIN"].getfloat( "positive_weights_multiply") patience = cp["TRAIN"].getint("patience") samples_per_epoch = cp["TRAIN"].getint("samples_per_epoch") reduce_lr = cp["TRAIN"].getfloat("reduce_lr") print("** DenseNet input resolution:", image_dimension, flush=True) print("** GAN image resolution:", resolution, flush=True) print("** Patience epochs", patience, flush=True) print("** Samples per epoch:", samples_per_epoch, flush=True) log2_record = int(np.log2(resolution)) record_file_ending = "*" + np.str(log2_record) + ".tfrecords" print("** Resolution ", resolution, " corresponds to ", record_file_ending, " TFRecord file.", flush=True) output_dir = os.path.join( results_subdir, "classification_results_res_" + np.str(2**log2_record) + "/train") print("Output Directory:", output_dir, flush=True) if not os.path.isdir(output_dir): os.makedirs(output_dir) # if previously trained weights is used, never re-split if use_trained_model_weights: print("** use trained model weights **", flush=True) training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): # TODO: add loading previous learning rate? training_stats = json.load(open(training_stats_file)) else: training_stats = {} else: # start over training_stats = {} show_model_summary = cp["TRAIN"].getboolean("show_model_summary") running_flag_file = os.path.join(output_dir, ".training.lock") if os.path.isfile(running_flag_file): raise RuntimeError("A process is running in this directory!!!") else: open(running_flag_file, "a").close() try: print("backup config file to", output_dir, flush=True) shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) tfrecord_dir_tr = os.path.join(results_subdir[:-4], "train") tfrecord_dir_vl = os.path.join(results_subdir[:-4], "valid") shutil.copy(tfrecord_dir_tr + "/train.csv", output_dir) shutil.copy(tfrecord_dir_vl + "/valid.csv", output_dir) # Get class names class_names = get_class_names(output_dir, "train") # get train sample counts train_counts, train_pos_counts = get_sample_counts( output_dir, "train", class_names) valid_counts, _ = get_sample_counts(output_dir, "valid", class_names) print("Total Training Data:", train_counts, flush=True) print("Total Validation Data:", valid_counts, flush=True) train_steps = int(min(samples_per_epoch, train_counts) / batch_size) print("** train_steps:", train_steps, flush=True) validation_steps = int(np.floor(valid_counts / batch_size)) print("** validation_steps:", validation_steps, flush=True) # compute class weights print("** compute class weights from training data **", flush=True) class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, ) print("** class_weights **", flush=True) print(class_weights) print("** load model **", flush=True) if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join( output_dir, "best_" + output_weights_name) else: model_weights_file = os.path.join(output_dir, output_weights_name) else: model_weights_file = None # Use downloaded weights if os.path.isfile(path_model_base_weights): base_weights = path_model_base_weights print("** Base weights will be loaded.", flush=True) else: base_weights = None print("** No Base weights.", flush=True) # Get Model # ------------------------------------ input_shape = (image_dimension, image_dimension, 3) img_input = Input(shape=input_shape) base_model = DenseNet121(include_top=False, weights=base_weights, input_tensor=img_input, input_shape=input_shape, pooling="avg") x = base_model.output predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x) model = Model(inputs=img_input, outputs=predictions) if use_trained_model_weights and model_weights_file != None: print("** load model weights_path:", model_weights_file, flush=True) model.load_weights(model_weights_file) # ------------------------------------ if show_model_summary: print(model.summary()) print("** create image generators", flush=True) train_seq = TFWrapper(tfrecord_dir=tfrecord_dir_tr, record_file_endings=record_file_ending, batch_size=batch_size, model_target_size=(image_dimension, image_dimension), steps=train_steps, augment=True, shuffle=True, prefetch=True, repeat=True) valid_seq = TFWrapper(tfrecord_dir=tfrecord_dir_vl, record_file_endings=record_file_ending, batch_size=batch_size, model_target_size=(image_dimension, image_dimension), steps=None, augment=False, shuffle=False, prefetch=True, repeat=True) # Initialise train and valid iterats print("** Initialise train and valid iterators", flush=True) train_seq.initialise() valid_seq.initialise() output_weights_path = os.path.join(output_dir, output_weights_name) print("** set output weights path to:", output_weights_path, flush=True) print("** SINGLE_gpu_model is used!", flush=True) model_train = model checkpoint = ModelCheckpoint( output_weights_path, save_weights_only=True, save_best_only=False, verbose=1, ) print("** compile model with class weights **", flush=True) optimizer = Adam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss="binary_crossentropy") auroc = MultipleClassAUROC(sequence=valid_seq, class_names=class_names, weights_path=output_weights_path, stats=training_stats, early_stop_p=patience, learn_rate_p=patience_reduce_lr, learn_rate_f=reduce_lr, min_lr=min_lr, workers=0) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size), auroc ] print("** start training **", flush=True) history = model_train.fit_generator( generator=train_seq, steps_per_epoch=train_steps, epochs=epochs, validation_data=valid_seq, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, workers=0, shuffle=False, ) # dump history print("** dump history **", flush=True) with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **", flush=True) finally: os.remove(running_flag_file)
def cxpl(model_dir, data_dir, results_subdir, random_seed, resolution): np.random.seed(random_seed) tf.set_random_seed(np.random.randint(1 << 31)) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) set_session(sess) # parser config config_file = model_dir+ "/config.ini" print("Config File Path:", config_file,flush=True) assert os.path.isfile(config_file) cp = ConfigParser() cp.read(config_file) output_dir = os.path.join(results_subdir, "classification_results/test") print("Output Directory:", output_dir,flush=True) if not os.path.isdir(output_dir): os.makedirs(output_dir) # default config image_dimension = cp["TRAIN"].getint("image_dimension") gan_resolution = resolution batch_size = cp["TEST"].getint("batch_size") use_best_weights = cp["TEST"].getboolean("use_best_weights") if use_best_weights: print("** Using BEST weights",flush=True) model_weights_path = os.path.join(results_subdir, "classification_results/train/best_weights.h5") else: print("** Using LAST weights",flush=True) model_weights_path = os.path.join(results_subdir, "classification_results/train/weights.h5") print("** DenseNet Input Resolution:", image_dimension, flush=True) print("** GAN Image Resolution:", gan_resolution, flush=True) # get test sample count test_dir = os.path.join(results_subdir, "inference/test") shutil.copy(test_dir+"/test.csv", output_dir) # Get class names class_names = get_class_names(output_dir,"test") tfrecord_dir_te = os.path.join(data_dir, "test") test_counts, _ = get_sample_counts(output_dir, "test", class_names) # get indicies (all of csv file for validation) print("** test counts:", test_counts, flush=True) # compute steps test_steps = int(np.floor(test_counts / batch_size)) print("** test_steps:", test_steps, flush=True) log2_record = int(np.log2(gan_resolution)) record_file_ending = "*"+ np.str(log2_record)+ ".tfrecords" print("** resolution ", gan_resolution, " corresponds to ", record_file_ending, " TFRecord file.", flush=True) # Get Model # ------------------------------------ input_shape=(image_dimension, image_dimension, 3) img_input = Input(shape=input_shape) base_model = DenseNet121( include_top = False, weights = None, input_tensor = img_input, input_shape = input_shape, pooling = "avg") x = base_model.output predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x) model = Model(inputs=img_input, outputs = predictions) print(" ** load model from:", model_weights_path, flush=True) model.load_weights(model_weights_path) # ------------------------------------ print("** load test generator **", flush=True) test_seq = TFWrapper( tfrecord_dir=tfrecord_dir_te, record_file_endings = record_file_ending, batch_size = batch_size, model_target_size = (image_dimension, image_dimension), steps = None, augment=False, shuffle=False, prefetch=True, repeat=False) print("** make prediction **", flush=True) test_seq.initialise() x_all, y_all = test_seq.get_all_test_data() print("X-Test Shape:", x_all.shape,flush=True) print("Y-Test Shape:", y_all.shape,flush=True) print("----------------------------------------", flush=True) print("Test Model AUROC", flush=True) y_pred = model.predict(x_all) current_auroc = [] for i in range(len(class_names)): try: score = roc_auc_score(y_all[:, i], y_pred[:, i]) except ValueError: score = 0 current_auroc.append(score) print(i+1,class_names[i],": ", score, flush=True) mean_auroc = np.mean(current_auroc) print("Mean auroc: ", mean_auroc,flush=True) print("----------------------------------------", flush=True) downscale_factor = 8 num_models_to_use = 3 num_test_images = 100 print("Number of Models to use:", num_models_to_use, flush=True) print("Number of Test images:", num_test_images, flush=True) x_tr, y_tr = x_all[num_test_images:], y_all[num_test_images:] x_te, y_te = x_all[0:num_test_images], y_all[0:num_test_images] downsample_factors = (downscale_factor,downscale_factor) print("Downsample Factors:", downsample_factors,flush=True) model_builder = UNetModelBuilder(downsample_factors, num_layers=2, num_units=8, activation="relu", p_dropout=0.0, verbose=0, batch_size=32, learning_rate=0.001) print("Model build done.",flush=True) masking_operation = ZeroMasking() loss = categorical_crossentropy explainer = CXPlain(model, model_builder, masking_operation, loss, num_models=num_models_to_use, downsample_factors=downsample_factors, flatten_for_explained_model=False) print("Explainer build done.",flush=True) explainer.fit(x_tr, y_tr); print("Explainer fit done.",flush=True) try: attr, conf = explainer.explain(x_te, confidence_level=0.80) np.save(output_dir+"/x_cxpl.npy", x_te) np.save(output_dir+"/y_cxpl.npy", y_te) np.save(output_dir+"/attr.npy", attr) np.save(output_dir+"/conf.npy", conf) print("Explainer explain done and saved.",flush=True) except Exception as ef: print(ef,flush=True)