def main(): ''' for now, this function grabs a ndpi image, splits the image and the mask and saves the splits in the split directory. ''' # NOTE: Pasar de "data/test" a "data/train" para hacer el training set preprocess_dir = "data/train" os.chdir(preprocess_dir) print(os.getcwd()) build_dirs(preprocess_dir) file_list, _, _ = list_files_from_dir(extension=".ndpi") for i, f in enumerate(file_list): print("{}. ".format(i) + f) width, height = 128, 128 print("Tile size: {}x{}".format(height, width)) for ndpi_file in file_list: print(ndpi_file) ndp_image, image_annotation_list = call_ndpi_ndpa(ndpi_file) rectangle_split_ndpi_ndpa(ndp_image=ndp_image, image_annotation_list=image_annotation_list, split_height=height, split_width=width, tohsv=False, path_ndpi="split/X", n_bkgnd_tiles=5000) data_augmentation()
def main(clean=False): ''' for now, this function grabs a ndpi image, splits the image and the mask and saves the splits in the split directory. ''' os.chdir("data/test") file_list, _, _ = list_files_from_dir(extension=".ndpi") for i, f in enumerate(file_list): print("{}. ".format(i) + f) print(file_list) for ndpi_file in file_list: print(ndpi_file) ndp_image, image_annotation_list = call_ndpi_ndpa(ndpi_file) width = floor(ndp_image.width_lvl_0 / 4) height = floor(ndp_image.height_lvl_0 / 4) rectangle_split_ndpi(ndp_image=ndp_image, split_width=width, split_height=height, tohsv=False, path="grandes_RGB") if clean: clean_split_files()
def main(): # NOTE: remember, the model used must have been trained with the same tile side # as the one being used here for testing test_set = "128px_x20_RGB_Box5_newPreprocessing_modeRangePreprocessing_partialsAre0_CD8" test_set_directory = "data/test/split/{}/X".format(test_set) file_list, dir_list, counts = list_files_from_dir(directory=test_set_directory, extension=".tif") print("\n\nNumber of elements in file list: " + str(len(file_list))) ild = {file_list[i]: dir_list[i] for i in range(len(dir_list))} tile_side = 128 model_directory = "D:/felipe/epi_seg/src/models/" # model_list, _, _ = list_files_from_dir(directory=model_directory[:-1], extension=".h5") # NOTE: por si solo necesito usar un modelo, puedo usar la linea: model_list = [ "20200117_InceptionV3_lossBCE_colorRGB_Box1-4_x20_modeRangePrep_partialsAre0_p16+CD8_optSGD_wDropout_128px_e40.h5", "20200117_InceptionV3_lossBCE_colorRGB_Box1-4_x20_modeRangePrep_partialsAre0_p16+CD8_optSGD_wDropout_128px_e10.h5" ] print("\nNumber models: " + str(len(model_list)) +"\n\n") for m in model_list: print("Model name: " + m) model = tf.keras.models.load_model(model_directory + m, custom_objects={'precision_m': precision_m, 'recall_m': recall_m, 'f1_m': f1_m, 'auc_m': auc_m}) test_generator = ImageTestGenerator(list_IDs=file_list, image_label_directory=ild, source_directory=test_set_directory, tile_side=tile_side, batch_size=64) results = model.predict_generator(generator=test_generator, workers=8, use_multiprocessing=True, verbose=1) print("shape of results: {}".format(results.shape)) rows = zip(file_list, dir_list, results.reshape(results.shape[0],)) with open("data/test/split_results/" + test_set + "/" + m[:-3] + ".csv", "w", newline='') as f: writer = csv.writer(f) for row in rows: writer.writerow(row) K.clear_session()
def data_augmentation(directory="split/X/1", flip_imgs=True): ''' generates extra images for eah image in directory: 3 addicional images corresponding to 90, 180, 270 rotations. if flip=True the flipped version of the 4 images (original + 3) will be created as well. ''' labels = {} print("Data augmentation commencing for images labelled 'epithelium'.") print("flip_imgs={}".format(flip_imgs)) file_list, _, _ = list_files_from_dir(directory=directory, extension=".tif") for img_name in tqdm(file_list): img = imread(directory + "/" + img_name) img_90, labels["90_" + img_name] = flip(transpose(img), 1), 1 save_np_as_image(cvtColor(img_90, COLOR_BGR2RGB), directory + "/90_" + img_name) img_180, labels["180_" + img_name] = flip(img, -1), 1 save_np_as_image(cvtColor(img_180, COLOR_BGR2RGB), directory + "/180_" + img_name) img_270, labels["270_" + img_name] = flip(transpose(img), 0), 1 save_np_as_image(cvtColor(img_270, COLOR_BGR2RGB), directory + "/270_" + img_name) if flip_imgs: img_f, labels["f_" + img_name] = flip(img, 1), 1 save_np_as_image(cvtColor(img_f, COLOR_BGR2RGB), directory + "/f_" + img_name) img_90f, labels["90f_" + img_name] = flip(img_90, 1), 1 save_np_as_image(cvtColor(img_90f, COLOR_BGR2RGB), directory + "/90f_" + img_name) img_180f, labels["180f_" + img_name] = flip(img_180, 1), 1 save_np_as_image(cvtColor(img_180f, COLOR_BGR2RGB), directory + "/180f_" + img_name) img_270f, labels["270f_" + img_name] = flip(img_270, 1), 1 save_np_as_image(cvtColor(img_270f, COLOR_BGR2RGB), directory + "/270f_" + img_name) json.dump(labels, open("split/X/augmentations.txt", "w")) pass
def main(): ''' trains a model from a set of tiles previously preprocessed and separated into folders according to the label ''' ### background_filters[0] --> ["stdDev", 0, "InceptionV3", True, "BCE", ]: ### "stdDev" --> stdDev o modeRange ### "0" --> "0" or "1" depending on "partialsAre" ### "InceptionV3" --> determines the base model. Can be one between "InceptionResNetV2", "InceptionV3", "ResNet50", "Xception", "basic" (which is default) ### True --> wether conv_base is trainable or not (if not, then transfer learning is applied) ### "BCE" --> loss function: can be "BCE" (binary crossentropy) or "ST" (stability training) (TODO: todavia no esta implementado este parametro, falta terminarlo) ### "RGB" --> color model, can be "RGB" or "HSV" (TODO: todavia no esta implementado este parametro, falta terminarlo) parameter_sets = [ # ["modeRange", "1", "InceptionV3", True, "BCE", "RGB"], # ["stdDev", "1", "InceptionV3", True, "BCE", "RGB"], # ["modeRange", "0", "InceptionV3", True, "BCE", "RGB"], # ["stdDev", "0", "InceptionV3", True, "BCE", "RGB"], # ["modeRange", "0", "InceptionV3", False, "BCE", "RGB"], # ["modeRange", "1", "InceptionV3", False, "BCE", "RGB"], # ["modeRange", "1", "ResNet50", True, "BCE", "RGB"], # ["stdDev", "1", "ResNet50", True, "BCE", "RGB"], # ["modeRange", "0", "ResNet50", True, "BCE", "RGB"], # ["stdDev", "0", "ResNet50", True, "BCE", "RGB"], # ["modeRange", "1", "basic", True, "BCE", "RGB"], # <--- got to 39 eopchs! ["stdDev", "1", "basic", True, "BCE", "RGB"], # ["modeRange", "0", "basic", True, "BCE", "RGB"], # ["stdDev", "0", "basic", True, "BCE", "RGB"], # ["modeRange", "1", "InceptionResNetV2", True, "BCE", "RGB"], # ["stdDev", "1", "InceptionResNetV2", True, "BCE", "RGB"], # ["modeRange", "0", "InceptionResNetV2", True, "BCE", "RGB"], # ["stdDev", "0", "InceptionResNetV2", True, "BCE", "RGB"], ["modeRange", "1", "Xception", True, "BCE", "RGB"], ["stdDev", "1", "Xception", True, "BCE", "RGB"], ["modeRange", "0", "Xception", True, "BCE", "RGB"], ["stdDev", "0", "Xception", True, "BCE", "RGB"], ] for p_set in parameter_sets: name_base = "models/" + time.strftime("%Y%m%d") + "_" + p_set[2] + "_trainable" + \ str(p_set[3]) + "_loss" + p_set[4] + "_color" + p_set[5] + "_15pics_x20_" + \ p_set[0] + "Preprocessing_partialsAre" + p_set[1] model_name = name_base + "_{}px_epoch_{}.h5" bg_filter_dir = "128px_x20_RGB_15pics_{}Preprocessing_partialsAre{}/".format( p_set[0], p_set[1]) full_dir = "data/train/split/{}X".format(bg_filter_dir) file_list, dir_list, counts = list_files_from_dir(directory=full_dir, extension=".tif") print("\n\nImage folder: " + bg_filter_dir) print("Image path: " + full_dir) print("Number of elements in file list: " + str(len(file_list))) print(str(counts) + "\n\n") print("Base model: " + p_set[2]) print("Base model trainable: " + str(p_set[3])) print("Loss function: " + str(p_set[4])) print("Color model: " + str(p_set[5])) train_list, val_list, _ = train_validation_test_partition(file_list, prop=(0.6, 0.4, 0.0)) ild = {file_list[i]: dir_list[i] for i in range(len(dir_list))} tile_side = 128 saver = CustomSaver(model_name=model_name, tile_side=tile_side) training_generator = ImageGenerator2(list_IDs=train_list, image_label_directory=ild, source_directory=full_dir, tile_side=tile_side, batch_size=64) validation_generator = ImageGenerator2(list_IDs=val_list, image_label_directory=ild, source_directory=full_dir, tile_side=tile_side, batch_size=64) class_weight = {0: 1., 1: (counts["-1"] + counts["0"]) / counts["1"]} # epochs = [40] # for e in epochs: model, history = basic_dl_model( tile_side, saver=saver, model_name=model_name, training_generator=training_generator, validation_generator=validation_generator, class_weight=class_weight, epochs=40, base=p_set[2], trainable=p_set[3]) # model.save(model_name.format(tile_side, e)) history_name = name_base + "_" + str(tile_side) + "px.json" with open(history_name, 'w') as jsonfile: json.dump(history.history, jsonfile) K.clear_session()
def main(): ''' trains a model from a set of tiles previously preprocessed and separated into folders according to the label ''' ### ["modeRange", "1", "InceptionV3", True, "BCE", "RGB", "p16", "15pics", "SGD"] ### [ 0 1 2 3 4 5 6 7 8 ] ### 0: type of background filterinf --> "stdDev" or "modeRange" ### 1: labelling of partially annotated triles --> "0" (non-epithelium) or "1" (epithelium) ### 2: DL base architecture --> "InceptionResNetV2", "InceptionV3", "ResNet50", "Xception", "basic" (which is default) ### 3: Variable that determines if layers of base architecture are trainable (no transfer learning) or not (transfer learning) --> True, False ### 4: Loss function --> "BCE" (binary crossentropy) or "ST" (stability training) (TODO: todavia no esta implementado este parametro, falta terminarlo) ### 5: Color model --> "RGB" or "HSV" ### 6: IHC used --> "p16" or "p16+CD8" ### 7: images used --> "15pics" or "Box1-4" ### 8: optimizer --> "Adam" o "SGD" (TODO: todavia no esta implemenbtado este parametro) parameter_sets = [ # ["modeRange", "1", "InceptionV3", True, "BCE", "HSV", "p16", "15pics", "Adam"], # ["stdDev", "1", "InceptionV3", True, "BCE", "RGB", "p16", "15pics", "Adam"], # ["modeRange", "0", "InceptionV3", True, "BCE", "RGB", "p16+CD8", "Box1-4", "SGD"], [ "modeRange", "0", "InceptionV3", True, "ST", "RGB", "p16", "15pics", "SGD" ], # ["stdDev", "0", "InceptionV3", True, "BCE", "RGB", "p16", "15pics", "Adam"], # ["modeRange", "0", "InceptionV3", False, "BCE", "RGB", "p16", "15pics", "Adam"], # ["modeRange", "1", "InceptionV3", False, "BCE", "RGB", "p16", "15pics", "Adam"], ] for p_set in parameter_sets: name_base = "models/" + time.strftime("%Y%m%d") + "_" + p_set[2] + "_loss" + p_set[4] + \ "_color" + p_set[5] + "_" + p_set[7] + "_x20_" + p_set[0] + "Prep_partialsAre" + \ p_set[1] + "_" + p_set[6] + "_opt" + p_set[8] + "_wDropout" model_name = name_base + "_{}px_e{}.h5" bg_filter_dir = "128px_x20_RGB_{}_{}Preprocessing_partialsAre{}_{}/".format( p_set[7], p_set[0], p_set[1], p_set[6]) full_dir = "data/train/split/{}X".format(bg_filter_dir) file_list, dir_list, counts = list_files_from_dir(directory=full_dir, extension=".tif") tohsv = False if p_set[5] == "HSV": tohsv = True print("\n\nImage folder: " + bg_filter_dir) print("Image path: " + full_dir) print("Number of elements in file list: " + str(len(file_list))) print(str(counts) + "\n\n") print("Base model: " + p_set[2]) print("Base model trainable: " + str(p_set[3])) print("optimizer: " + p_set[8]) print("Loss function: " + str(p_set[4])) print("Color model: " + str(p_set[5]) + "- tohsv=" + str(tohsv)) train_list, val_list, _ = train_validation_test_partition(file_list, prop=(0.6, 0.4, 0.0)) ild = {file_list[i]: dir_list[i] for i in range(len(dir_list))} tile_side = 128 saver = CustomSaver(model_name=model_name, tile_side=tile_side) training_generator = ImageGenerator2(list_IDs=train_list, image_label_directory=ild, source_directory=full_dir, tile_side=tile_side, batch_size=64, hsv=tohsv) validation_generator = ImageGenerator2(list_IDs=val_list, image_label_directory=ild, source_directory=full_dir, tile_side=tile_side, batch_size=64, hsv=tohsv) class_weight = {0: 1., 1: (counts["-1"] + counts["0"]) / counts["1"]} # epochs = [40] # for e in epochs: model, history = basic_dl_model( tile_side, saver=saver, model_name=model_name, training_generator=training_generator, validation_generator=validation_generator, class_weight=class_weight, epochs=40, base=p_set[2], trainable=p_set[3]) # model.save(model_name.format(tile_side, e)) history_name = name_base + "_" + str(tile_side) + "px.json" with open(history_name, 'w') as jsonfile: json.dump(history.history, jsonfile) K.clear_session()
# Evaluate model on data # ndpi_file = "src/data/test/prueba2.ndpi" # ndp_image, image_annotation_list = call_ndpi_ndpa(ndpi_file) # val_X = np.array(ndp_image.read_region(location=(0, 0), # level=0, # size=(ndp_image.width_lvl_0, # ndp_image.height_lvl_0))) # val_X = cv2.cvtColor(val_X[:, :, :3], cv2.COLOR_RGB2BGR) tile_size = 128 resolution = "x20" image_dir = "src/data/test/grandes_RGB/{}".format(resolution) file_list, _, _ = list_files_from_dir(directory=image_dir, extension=".tif") # image_filename = "S04_2819_p16_RTU_ER1_20 - 2016-04-12 15.39.25_(30720,16384)_10240x8192.tif" for image_filename in file_list: print(image_filename) val_X = cv2.imread(image_dir + "/" + image_filename) val_X = normalize_image(val_X) n_ver = floor(val_X.shape[0] / tile_size) n_hor = floor(val_X.shape[1] / tile_size) val_X = convert_image_to_stack_of_tiles(val_X, tile_size, tile_size)