def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(output_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() print("** weight path is {} **".format(model_weights_path)) model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_ids = PatientInfo('stage_1_test_images/', train=True) train=False, img_dir='stage_1_test_images/*' #image_source_dir+'*', #'stage_1_test_images/*',
def load_ori_model(config_file="./config.ini"): cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(output_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path # load CheXNet model: model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) return model
def load_model(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") image_dimension = cp["TRAIN"].getint("image_dimension") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # CAM config bbox_list_file = cp["CAM"].get("bbox_list_file") use_best_weights = cp["CAM"].getboolean("use_best_weights") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) return model, class_names
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") image_dimension = cp["TRAIN"].getint("image_dimension") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # CAM config bbox_list_file = cp["CAM"].get("bbox_list_file") use_best_weights = cp["CAM"].getboolean("use_best_weights") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("read bbox list file") df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1) df_images.columns = ["file_name", "label", "x", "y", "w", "h"] print("create a generator for loading transformed images") cam_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=1, target_size=(image_dimension, image_dimension), augmenter=None, steps=1, shuffle_on_epoch_end=False, ) image_output_dir = os.path.join(output_dir, "cam") if not os.path.isdir(image_output_dir): os.makedirs(image_output_dir) print("create CAM") df_images.apply( lambda g: create_cam( df_g=g, output_dir=image_output_dir, image_source_dir=image_source_dir, model=model, generator=cam_sequence, class_names=class_names, ), axis=1, )
base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") print(output_weights_name) best_weights_path = os.path.join(output_dir, "best_weights.h5") print("** load model **") model_weights_path = best_weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) model._make_predict_function() app = Flask(__name__) @app.route('/alive', methods=['GET']) def alive(): return 'Ok' @app.route('/ready', methods=['GET']) def ready(): return 'Ok'
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, "best_{}".format(output_weights_name)) # get test sample count test_counts, _ = get_sample_counts(output_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(""" test_steps: {} is invalid, please use 'auto' or integer. """.format(test_steps)) print("** test_steps: {} **".format(test_steps)) print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "dev.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() test_log_path = os.path.join(output_dir, "test.log") print("** write log to {} **".format(test_log_path)) aurocs = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): try: score = roc_auc_score(y[:, i], y_hat[:, i]) aurocs.append(score) except ValueError: score = 0 f.write("{}: {}\n".format(class_names[i], score)) mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write("mean auroc: {}\n".format(mean_auroc)) print("mean auroc: {}".format(mean_auroc))
from skimage.transform import resize import cv2 from keras import backend as kb app = Flask(__name__) class_names = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia' ] model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name='DenseNet121', use_base_weights=False, weights_path='models/best_weights.h5') graph = tf.get_default_graph() def load_image(image_file): #image_path = os.path.join(self.source_image_dir, image_file) image = Image.open(image_file) image_array = np.asarray(image.convert("RGB")) image_array = image_array / 255. image_array = resize(image_array, (224, 224)) return image_array def get_output_layer(model, layer_name): # get the symbolic outputs of each "key" layer (we gave them unique names).
def train_rsna_clf(train_data=None, validation_data=None, remove_running=True): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") image_source_dir = cp["DEFAULT"].get("image_source_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names1 = cp["DEFAULT"].get("class_names1").split(",") class_names2 = cp["DEFAULT"].get("class_names2").split(",") # train config train_image_source_dir = cp["TRAIN"].get("train_image_source_dir") train_class_info = cp["TRAIN"].get("train_class_info") train_box_info = cp["TRAIN"].get("train_box_info") use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights") use_trained_model_weights = cp["TRAIN"].getboolean( "use_trained_model_weights") use_best_weights = cp["TRAIN"].getboolean("use_best_weights") input_weights_name = cp["TRAIN"].get("input_weights_name") output_weights_name = cp["TRAIN"].get("output_weights_name") epochs = cp["TRAIN"].getint("epochs") batch_size = cp["TRAIN"].getint("batch_size") initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate") generator_workers = cp["TRAIN"].getint("generator_workers") image_dimension = cp["TRAIN"].getint("image_dimension") train_steps = cp["TRAIN"].get("train_steps") patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr") min_lr = cp["TRAIN"].getfloat("min_lr") validation_steps = cp["TRAIN"].get("validation_steps") positive_weights_multiply = cp["TRAIN"].getfloat( "positive_weights_multiply") dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir") # if previously trained weights is used, never re-split if use_trained_model_weights: # resuming mode print("** use trained model weights **") # load training status for resuming training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): # TODO: add loading previous learning rate? training_stats = json.load(open(training_stats_file)) else: training_stats = {} else: # start over training_stats = {} show_model_summary = cp["TRAIN"].getboolean("show_model_summary") # end parser config # check output_dir, create it if not exists if not os.path.isdir(output_dir): os.makedirs(output_dir) running_flag_file = os.path.join(output_dir, ".training.lock") if os.path.isfile(running_flag_file): if remove_running: os.remove(running_flag_file) open(running_flag_file, "a").close() else: raise RuntimeError("A process is running in this directory!!!") else: open(running_flag_file, "a").close() try: print(f"backup config file to {output_dir}") shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) # get train/dev sample counts train_counts, train_pos_counts = get_sample_counts( train_data.df, class_names2) validation_counts, _ = get_sample_counts(validation_data.df, class_names2) # compute steps if train_steps == "auto": train_steps = int(train_counts / batch_size) else: try: train_steps = int(train_steps) except ValueError: raise ValueError(f""" train_steps: {train_steps} is invalid, please use 'auto' or integer. """) print(f"** train_steps: {train_steps} **") if validation_steps == "auto": validation_steps = int(validation_counts / batch_size) else: try: validation_steps = int(validation_steps) except ValueError: raise ValueError(f""" validation_steps: {validation_steps} is invalid, please use 'auto' or integer. """) print(f"** validation_steps: {validation_steps} **") # compute class weights print("** compute class weights from training data **") class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, ) print("** class_weights **") print(class_weights) print("** load model **") if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join( output_dir, f"best_{input_weights_name}") else: model_weights_file = os.path.join(output_dir, input_weights_name) else: model_weights_file = None model_factory = ModelFactory() model = model_factory.get_model( class_names1, model_name=base_model_name, use_base_weights=use_base_model_weights, weights_path=model_weights_file, input_shape=(image_dimension, image_dimension, 3)) model = modify_last_layer(model, class_names2) if show_model_summary: print(model.summary()) train_sq = AugmentedLabelSequence_clf( train_data, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=train_steps, ) validation_sq = AugmentedLabelSequence_clf( validation_data, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=validation_steps, ) output_weights_path = os.path.join(output_dir, output_weights_name) print(f"** set output weights path to: {output_weights_path} **") print("** check multiple gpu availability **") gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(",")) if gpus > 1: print(f"** multi_gpu_model is used! gpus={gpus} **") model_train = multi_gpu_model(model, gpus) # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model checkpoint = MultiGPUModelCheckpoint( filepath=output_weights_path, base_model=model, ) else: model_train = model checkpoint = ModelCheckpoint( output_weights_path, save_weights_only=True, save_best_only=True, verbose=1, ) print("** compile model with class weights **") optimizer = Adam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss="binary_crossentropy") auroc = MultipleClassAUROC( sequence=validation_sq, class_names=class_names2, weights_path=output_weights_path, stats=training_stats, workers=generator_workers, ) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr, verbose=1, mode="min", min_lr=min_lr), auroc, ] print("** start training **") history = model_train.fit_generator( generator=train_sq, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_sq, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, workers=generator_workers, shuffle=False, ) # dump history print("** dump history **") with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **") finally: os.remove(running_flag_file)
def main(fold,gender_train,gender_test): # parser config config_file = 'config_file.ini' cp = ConfigParser() cp.read(config_file) root_output_dir= cp["DEFAULT"].get("output_dir") # default config print(root_output_dir,gender_train) output_dir= root_output_dir + gender_train+'/Fold_'+str(fold)+'/output/' base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(root_output_dir+gender_train+'/Fold_'+str(fold),str(gender_test), class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(root_output_dir+gender_train+'/Fold_'+str(fold), str(gender_test)+".csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() y_pred_dir = output_dir + "y_pred_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv" y_true_dir = output_dir + "y_true_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv" np.savetxt(y_pred_dir, y_hat, delimiter=",") np.savetxt(y_true_dir, y, delimiter=",")
def load_model(config_file="./config.ini", change_arch=False, compile_=True): cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(output_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path # load CheXNet model: model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) if change_arch: #return model # input layer, output layer: input_layer = model.get_layer(index=0) chex_output = model.get_layer(index=-1) # add second last layer: intermediate_layer = model.get_layer(index=-2) rsna_add_layer = Dense(10, activation='relu', name='rsna_add_layer')( intermediate_layer.output) # params are tentative rsna_clf_output = Dense(3, activation='softmax', name='rsna_clf_output')(concatenate( [rsna_add_layer, chex_output.output])) model = Model(inputs=[input_layer.input], outputs=[rsna_clf_output]) losses = {'rsna_clf_output': 'categorical_crossentropy'} if compile_: print('** compile **') model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.]) else: if compile_: print('** compile **') model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.]) return model
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") if not os.path.exists(output_dir): os.mkdir(output_dir) base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") image_dimension = cp["TRAIN"].getint("image_dimension") batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) data_set_dir = cp["TRAIN"].get("dataset_csv_dir") input_weights_name = cp["TRAIN"].get("input_weights_name") best_weights_path = os.path.join(data_set_dir, f"best_{input_weights_name}") # get test sample count test_counts, _ = get_sample_counts(data_set_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) # CAM config bbox_list_file = cp["CAM"].get("bbox_list_file") use_best_weights = cp["CAM"].getboolean("use_best_weights") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("read bbox list file") df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1) df_images.columns = ["file_name", "label", "x", "y", "w", "h"] print("create a generator for loading transformed images") cam_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(data_set_dir, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) image_output_dir = os.path.join(output_dir, "cam") if not os.path.isdir(image_output_dir): os.makedirs(image_output_dir) print("create CAM") df_images.apply( lambda g: create_cam( df_g=g, output_dir=image_output_dir, image_source_dir=image_source_dir, model=model, generator=cam_sequence, class_names=class_names, ), axis=1, )
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") if not os.path.exists(output_dir): os.mkdir(output_dir) base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") data_set_dir = cp["TRAIN"].get("dataset_csv_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path input_weights_name = cp["TRAIN"].get("input_weights_name") weights_path = os.path.join(data_set_dir, input_weights_name) best_weights_path = os.path.join(data_set_dir, f"best_{input_weights_name}") # get test sample count test_counts, _ = get_sample_counts(data_set_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( # dataset_csv_file=os.path.join(output_dir, "dev.csv"), # dataset_csv_file=os.path.join(data_set_dir, "test.csv"), dataset_csv_file=os.path.join(data_set_dir, "MIMIC_data_test_1206_combined.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) # test_sequence.dataset_df.to_csv(os.path.join(output_dir, 'test_data_frame.csv')) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() # np.savetxt(os.path.join(output_dir, 'y_hat_1205_default_weight.txt'), y_hat) # np.savetxt(os.path.join(output_dir, 'y_1205.txt'), y) test_log_path = os.path.join(output_dir, "test.log") print(f"** write log to {test_log_path} **") aurocs = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): try: score = roc_auc_score(y[:, i], y_hat[:, i]) aurocs.append(score) except ValueError: score = 0 f.write(f"{class_names[i]}: {score}\n") mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write(f"mean auroc: {mean_auroc}\n") print(f"mean auroc: {mean_auroc}")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_epoch', type=int, default=0) args = parser.parse_args() # Set Parameter # base_model_name = "DenseNet121" use_base_model_weights = True weights_path = None image_dimension = 224 batch_size = 32 epochs = 20 class_names = ["Nodule", "Pneumothorax"] csv_path = './data/classification' image_source_dir = '/media/nfs/CXR/NIH/chest_xrays/NIH/data/images_1024x1024/' augmenter = None # If train_steps is set to None, will calculate train steps by len(train)/batch_size train_steps = None positive_weights_multiply = 1 outputs_path = './experiments/ae' weights_name = f'weights{args.model_epoch}.h5' output_weights_path = os.path.join(outputs_path, weights_name) initial_learning_rate = 0.0001 training_stats = {} # Get Sample and Total Count From Training Data and Compute Class Weights # train_counts, train_pos_counts = get_sample_counts(csv_path, "train", class_names) if train_steps == None: train_steps = int(train_counts / batch_size) dev_counts, _ = get_sample_counts(csv_path, "test", class_names) validation_steps = int(dev_counts / batch_size) print('***Compute Class Weights***') class_weights = get_class_weights(train_counts, train_pos_counts, multiply=positive_weights_multiply) print(class_weights) # Create Image Sequence # train_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(csv_path, "train.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=train_steps, model_epoch=args.model_epoch) validation_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(csv_path, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=validation_steps, shuffle_on_epoch_end=False, model_epoch=args.model_epoch) # Build Model # factory = ModelFactory() model = factory.get_model(class_names, model_name=base_model_name, use_base_weights=use_base_model_weights, weights_path=None, input_shape=(image_dimension, image_dimension, 3)) print("** check multiple gpu availability **") gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(",")) if gpus > 1: print("** multi_gpu_model is used! gpus={gpus} **") model_train = multi_gpu_model(model, gpus) # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model checkpoint = MultiGPUModelCheckpoint( filepath=output_weights_path, base_model=model, ) else: model_train = model checkpoint = ModelCheckpoint( output_weights_path, save_weights_only=True, save_best_only=True, verbose=1, ) auroc = MultipleClassAUROC(sequence=validation_sequence, class_names=class_names, weights_path=output_weights_path, stats=training_stats, workers=8, model_epoch=args.model_epoch) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(outputs_path, "logs"), batch_size=batch_size), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, verbose=1, mode="min", min_lr=1e-8), auroc, ] # Compile Model # print('*** Start Compiling ***') optimizer = Adam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss="binary_crossentropy") # Train # print("** start training **") history = model_train.fit_generator( generator=train_sequence, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_sequence, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, workers=8, shuffle=False, ) # dump history print("** dump history **") with open(os.path.join(outputs_path, f"history{args.model_epoch}.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **")
def main(): # default config output_dir = './outputs' base_model_name = 'InceptionResNetV2' class_names = Atelectasis,Cardiomegaly,Effusion,Infiltration,Mass,Nodule,Pneumonia,Pneumothorax,Consolidation,Edema,Emphysema,Fibrosis,Pleural_Thickening,Hernia image_source_dir = './Images' image_dimension = 341 batch_size = 16 test_steps = 1 use_best_weights = True output_weights_name = weights.h5 weights_path = ' best_weights_path = './outputs/best_auroc.h5 # get test sample count test_counts, _ = get_sample_counts(output_dir, "testt", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence2( dataset_csv_file=os.path.join(output_dir, "testt.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() np.save('y_hat_val.npy',y_hat) np.save('y_val.npy',y) test_log_path = "./outputs/val.log") print(f"** write log to {test_log_path} **") aurocs = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): try: score = roc_auc_score(y[:, i], y_hat[:, i]) aurocs.append(score) except ValueError: score = 0 f.write(f"{class_names[i]}: {score}\n") mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write(f"mean auroc: {mean_auroc}\n") print(f"mean auroc: {mean_auroc}")