def main(): batch_size = 16 epochs = 50 save_dir = os.path.join(os.getcwd(), 'saved_models') if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = "saved_models/94482_23620_keras_cw_noDropOut_chexpert_pretrained_chexnet_1_{epoch:03d}_{val_loss:.5f}.h5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, mode='min') callbacks_list = [checkpoint] #new_model_name = '94482_23620_keras_chexpert_pretrained_chexnet_512_6_epochs_1.h5' base_model, model = get_model() # load old weights #old_model_name = 'keras_chexpert_pretrained_chexnet_512_6_epochs_10.h5' #model_path = os.path.join(save_dir, old_model_name) # model.load_weights(model_path) # print a model summary # print_summary(model) csv_file_path = 'chexpert/train_94482_frontal_6_classes_real_no_zeros_preprocessed.csv' #train_df = pd.read_csv(csv_file_path) class_weight = get_class_weight(csv_file_path, target_classes) train_generator = AugmentedImageSequence(dataset_csv_file=csv_file_path, class_names=target_classes, source_image_dir='./chexpert/', batch_size=batch_size) csv_file_path = 'chexpert/train_23620_frontal_6_classes_real_no_zeros_preprocessed.csv' #valid_df = pd.read_csv(csv_file_path) valid_generator = AugmentedImageSequence(dataset_csv_file=csv_file_path, class_names=target_classes, source_image_dir='./chexpert/', batch_size=batch_size) STEP_SIZE_TRAIN = train_generator.steps STEP_SIZE_VALID = valid_generator.steps model.fit_generator(generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator, validation_steps=STEP_SIZE_VALID, epochs=epochs, callbacks=callbacks_list, class_weight=class_weight, use_multiprocessing=True)
def get_generator(csv_path,FLAGS, data_augmenter=None): return AugmentedImageSequence( dataset_csv_file=csv_path, label_columns=FLAGS.csv_label_columns, class_names=FLAGS.classes, multi_label_classification=FLAGS.multi_label_classification, source_image_dir=FLAGS.image_directory, batch_size=FLAGS.batch_size, target_size=FLAGS.image_target_size, augmenter=data_augmenter, shuffle_on_epoch_end=False, )
def get_enqueuer(csv,batch_size, FLAGS, tokenizer_wrapper, augmenter=None): data_generator = AugmentedImageSequence( dataset_csv_file=csv, class_names=FLAGS.csv_label_columns, tokenizer_wrapper=tokenizer_wrapper, source_image_dir=FLAGS.image_directory, batch_size=batch_size, target_size=FLAGS.image_target_size, augmenter=augmenter, shuffle_on_epoch_end=True, ) enqueuer = OrderedEnqueuer(data_generator, use_multiprocessing=False, shuffle=False) return enqueuer, data_generator.steps
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(output_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") logs = [] starttime = time() y_hat = model.predict_generator(test_sequence, verbose=1) logs.append(time() - starttime) print("time: " + str(logs)) y = test_sequence.get_y_true() test_log_path = os.path.join(output_dir, "test.log") print(f"** write log to {test_log_path} **") aurocs = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): try: score = roc_auc_score(y[:, i], y_hat[:, i]) aurocs.append(score) except ValueError: score = 0 f.write(f"{class_names[i]}: {score}\n") mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write(f"mean auroc: {mean_auroc}\n") print(f"mean auroc: {mean_auroc}")
# compute steps steps = int(training_counts / batch_size) print(f"** train_steps: {steps} **") print("** load training generator **") tokenizer_wrapper = TokenizerWrapper(os.path.join(data_dir, all_data_csv), class_names[0], max_sequence_length, tokenizer_vocab_size) data_generator = AugmentedImageSequence( dataset_csv_file=os.path.join(data_dir, training_csv), class_names=class_names, tokenizer_wrapper=tokenizer_wrapper, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=steps, shuffle_on_epoch_end=True, ) medical_w2v = Medical_W2V_Wrapper() embeddings = medical_w2v.get_embeddings_matrix_for_words( tokenizer_wrapper.get_word_tokens_list(), tokenizer_vocab_size) print(embeddings.shape) del medical_w2v encoder = CNN_Encoder(embedding_dim) decoder = RNN_Decoder(embedding_dim, units, tokenizer_vocab_size, embeddings)
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") image_dimension = cp["TRAIN"].getint("image_dimension") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # CAM config bbox_list_file = cp["CAM"].get("bbox_list_file") use_best_weights = cp["CAM"].getboolean("use_best_weights") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("read bbox list file") df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1) df_images.columns = ["file_name", "label", "x", "y", "w", "h"] print("create a generator for loading transformed images") cam_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=1, target_size=(image_dimension, image_dimension), augmenter=None, steps=1, shuffle_on_epoch_end=False, ) image_output_dir = os.path.join(output_dir, "cam") if not os.path.isdir(image_output_dir): os.makedirs(image_output_dir) print("create CAM") df_images.apply( lambda g: create_cam( df_g=g, output_dir=image_output_dir, image_source_dir=image_source_dir, model=model, generator=cam_sequence, class_names=class_names, ), axis=1, )
def train(self, log_dir, show_model=True): """ Training classification model """ ################################################################################### augs = augmentation() optimizer = Adam(lr=config.LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) early_stop = EarlyStopping(monitor="val_loss", min_delta=0, patience=20, verbose=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001) ################################################################################### TRAIN_CSV_FP, tmp_train = tool.prepare_dataset(config.TRAIN) VAL_CSV_FP, tmp_val = tool.prepare_dataset(config.VAL) SAVE_WEIGHT_FP = os.path.join(log_dir, "{epoch:03d}-{val_loss:.4f}.h5") AUC_LOG_FP = os.path.join(log_dir, "auc.txt") # Make log directory if not exist if not os.path.isdir(log_dir): os.makedirs(log_dir) # Training dataset train_sequence = AugmentedImageSequence( csv_fp=TRAIN_CSV_FP, class_names=config.CLASS_NAMES, batch_size=config.BATCH_SIZE, target_size=(config.NET_INPUT_DIM, config.NET_INPUT_DIM), steps=config.TRAIN_STEPS, augmenter=augs, ) # Validation dataset validation_sequence = AugmentedImageSequence( csv_fp=VAL_CSV_FP, class_names=config.CLASS_NAMES, batch_size=config.BATCH_SIZE, target_size=(config.NET_INPUT_DIM, config.NET_INPUT_DIM), steps=config.VAL_STEPS, shuffle_on_epoch_end=False, ) # Load classification model model = ModelFactory().get_classification_model( class_num=len(config.CLASS_NAMES), model_name=config.MODEL_NAME, base_weights="imagenet", input_shape=(config.NET_INPUT_DIM, config.NET_INPUT_DIM, 3)) if show_model: print(model.summary()) model.compile(optimizer=optimizer, loss=config.LOSS, metrics=config.METRICS) # Callbacks checkpoint = ModelCheckpoint(SAVE_WEIGHT_FP, save_weights_only=False, save_best_only=False, verbose=0) tensorboard = TensorBoard(log_dir=os.path.join(log_dir, "logs")) csv_logger = CSVLogger(os.path.join(log_dir, "my_logger.csv")) auroc = MultipleClassAUROC(AUC_LOG_FP, validation_sequence, config.CLASS_NAMES, config.GENERATOR_WORKERS) callbacks = [ checkpoint, tensorboard, csv_logger, early_stop, reduce_lr, auroc ] history = model.fit_generator(generator=train_sequence, steps_per_epoch=config.TRAIN_STEPS, epochs=config.EPOCH, verbose=2, validation_data=validation_sequence, callbacks=callbacks, workers=config.GENERATOR_WORKERS, shuffle=False) print("\nFinished training") if tmp_train: os.remove(TRAIN_CSV_FP) if tmp_val: os.remove(VAL_CSV_FP)
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") image_source_dir = cp["DEFAULT"].get("image_source_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") # train config use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights") use_trained_model_weights = cp["TRAIN"].getboolean( "use_trained_model_weights") use_best_weights = cp["TRAIN"].getboolean("use_best_weights") output_weights_name = cp["TRAIN"].get("output_weights_name") epochs = cp["TRAIN"].getint("epochs") batch_size = cp["TRAIN"].getint("batch_size") initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate") generator_workers = cp["TRAIN"].getint("generator_workers") image_dimension = cp["TRAIN"].getint("image_dimension") train_steps = cp["TRAIN"].get("train_steps") patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr") min_lr = cp["TRAIN"].getfloat("min_lr") validation_steps = cp["TRAIN"].get("validation_steps") positive_weights_multiply = cp["TRAIN"].getfloat( "positive_weights_multiply") dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir") # if previously trained weights is used, never re-split if use_trained_model_weights: # resuming mode print("** use trained model weights **") # load training status for resuming training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): # TODO: add loading previous learning rate? training_stats = json.load(open(training_stats_file)) else: training_stats = {} else: # start over training_stats = {} show_model_summary = cp["TRAIN"].getboolean("show_model_summary") # end parser config # check output_dir, create it if not exists if not os.path.isdir(output_dir): os.makedirs(output_dir) running_flag_file = os.path.join(output_dir, ".training.lock") if os.path.isfile(running_flag_file): raise RuntimeError("A process is running in this directory!!!") else: open(running_flag_file, "a").close() try: print(f"backup config file to {output_dir}") shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) datasets = ["train", "dev", "test"] for dataset in datasets: shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"), output_dir) # get train/dev sample counts train_counts, train_pos_counts = get_sample_counts( output_dir, "train", class_names) dev_counts, _ = get_sample_counts(output_dir, "dev", class_names) # compute steps if train_steps == "auto": train_steps = int(train_counts / batch_size) else: try: train_steps = int(train_steps) except ValueError: raise ValueError(f""" train_steps: {train_steps} is invalid, please use 'auto' or integer. """) print(f"** train_steps: {train_steps} **") if validation_steps == "auto": validation_steps = int(dev_counts / batch_size) else: try: validation_steps = int(validation_steps) except ValueError: raise ValueError(f""" validation_steps: {validation_steps} is invalid, please use 'auto' or integer. """) print(f"** validation_steps: {validation_steps} **") # compute class weights print("** compute class weights from training data **") class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, ) print("** class_weights **") print(class_weights) print("** load model **") if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join( output_dir, f"best_{output_weights_name}") else: model_weights_file = os.path.join(output_dir, output_weights_name) else: model_weights_file = None model_factory = ModelFactory() model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=use_base_model_weights, weights_path=model_weights_file, input_shape=(image_dimension, image_dimension, 3)) if show_model_summary: print(model.summary()) print("** create image generators **") train_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "train.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=train_steps, ) validation_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "dev.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=validation_steps, shuffle_on_epoch_end=False, ) output_weights_path = os.path.join(output_dir, output_weights_name) print(f"** set output weights path to: {output_weights_path} **") print("** check multiple gpu availability **") gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(",")) if gpus > 1: print(f"** multi_gpu_model is used! gpus={gpus} **") model_train = multi_gpu_model(model, gpus) # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model checkpoint = MultiGPUModelCheckpoint( filepath=output_weights_path, base_model=model, ) else: model_train = model checkpoint = ModelCheckpoint( output_weights_path, save_weights_only=True, save_best_only=True, verbose=1, ) print("** compile model with class weights **") #model.compile(RAdam(), loss='mse') #optimizer = Adam(lr=initial_learning_rate) optimizer = RAdam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss=[focal_loss]) lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead lookahead.inject(model_train) # add into model auroc = MultipleClassAUROC( sequence=validation_sequence, class_names=class_names, weights_path=output_weights_path, stats=training_stats, workers=generator_workers, ) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr, verbose=1, mode="min", min_lr=min_lr), auroc, EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25), ] print("** start training **") history = model_train.fit_generator( generator=train_sequence, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_sequence, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, workers=generator_workers, shuffle=False, ) # dump history print("** dump history **") with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **") finally: os.remove(running_flag_file)
def main(fold,gender_train,gender_test): # parser config config_file = 'config_file.ini' cp = ConfigParser() cp.read(config_file) root_output_dir= cp["DEFAULT"].get("output_dir") # default config print(root_output_dir,gender_train) output_dir= root_output_dir + gender_train+'/Fold_'+str(fold)+'/output/' base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}") # get test sample count test_counts, _ = get_sample_counts(root_output_dir+gender_train+'/Fold_'+str(fold),str(gender_test), class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(root_output_dir+gender_train+'/Fold_'+str(fold), str(gender_test)+".csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() y_pred_dir = output_dir + "y_pred_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv" y_true_dir = output_dir + "y_true_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv" np.savetxt(y_pred_dir, y_hat, delimiter=",") np.savetxt(y_true_dir, y, delimiter=",")
def main(): # parser config print("### Input configuration file ### \n") config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config print("### Read default configurations ### \n") output_dir = cp["DEFAULT"].get("output_dir") image_train_source_dir = cp["DEFAULT"].get("image_train_source_dir") image_valid_source_dir = cp["DEFAULT"].get("image_valid_source_dir") base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") # train config print("### Reading training configurations ### \n") use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights") use_trained_model_weights = cp["TRAIN"].getboolean( "use_trained_model_weights") use_best_weights = cp["TRAIN"].getboolean("use_best_weights") output_weights_name = cp["TRAIN"].get("output_weights_name") epochs = cp["TRAIN"].getint("epochs") batch_size = cp["TRAIN"].getint("batch_size") initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate") generator_workers = cp["TRAIN"].getint("generator_workers") image_dimension = cp["TRAIN"].getint("image_dimension") patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr") min_lr = cp["TRAIN"].getfloat("min_lr") positive_weights_multiply = cp["TRAIN"].getfloat( "positive_weights_multiply") dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir") # if previously trained weights is used, never re-split if use_trained_model_weights: # resuming mode print("** use trained model weights **") # load training status for resuming training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): # TODO: add loading previous learning rate? training_stats = json.load(open(training_stats_file)) else: training_stats = {} else: # start over training_stats = {} print("### Show model summary ### \n") show_model_summary = cp["TRAIN"].getboolean("show_model_summary") # end parser config print("### Check output directory ### \n") # check output_dir, create it if not exists if not os.path.isdir(output_dir): os.makedirs(output_dir) running_flag_file = os.path.join(output_dir, ".training.lock") if os.path.isfile(running_flag_file): raise RuntimeError("A process is running in this directory!!!") else: open(running_flag_file, "a").close() try: print("### Backup config file to {} \n".format(output_dir)) shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) datasets = ["train", "valid"] for dataset in datasets: shutil.copy(os.path.join(dataset_csv_dir, dataset + '.csv'), output_dir) # get train/dev sample counts print("### Get class frequencies ### \n") train_counts, train_pos_counts = get_sample_counts( output_dir, "train", class_names) dev_counts, _ = get_sample_counts(output_dir, "valid", class_names) # compute steps print("### Compute step size ### \n") train_steps = int(train_counts / batch_size) validation_steps = int(dev_counts / batch_size) # compute class weights print("### Class weights ### \n") class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, ) print("### Class_weights ### \n") print(class_weights) print("\n") print("### Loading model ### \n") if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join( output_dir, "best_" + output_weights_name) else: model_weights_file = os.path.join(output_dir, output_weights_name) else: model_weights_file = None model_factory = ModelFactory() print("### Get model ### \n") model = model_factory.get_model( class_names, model_name=base_model_name, use_base_weights=use_base_model_weights, weights_path=model_weights_file, input_shape=(image_dimension, image_dimension, 3)) print("Show model summary? {}".format(show_model_summary)) if show_model_summary: print(model.summary()) print("\n ### Create image generators ### \n") train_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "train.csv"), class_names=class_names, source_image_dir=image_train_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=train_steps, ) validation_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(output_dir, "valid.csv"), class_names=class_names, source_image_dir=image_valid_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=validation_steps, shuffle_on_epoch_end=False, ) output_weights_path = os.path.join(output_dir, output_weights_name) print("### Set output weights path to {} ### \n".format( output_weights_path)) print("### Check multiple gpu availability ### \n") #gpus = len(os.getenv("CUDA_VISIBLE_DEVICES").split(",")) if False: ## Turn off multiple gpu model print("### Multi_gpu_model is used! gpus={} ###".format(gpus)) model_train = multi_gpu_model(model, gpus) # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model checkpoint = MultiGPUModelCheckpoint( filepath=output_weights_path, base_model=model, ) else: model_train = model checkpoint = ModelCheckpoint( output_weights_path, save_weights_only=True, save_best_only=True, verbose=1, ) print("### Compile model with class weights ### \n") optimizer = Adam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss="binary_crossentropy") auroc = MultipleClassAUROC( sequence=validation_sequence, class_names=class_names, weights_path=output_weights_path, stats=training_stats, workers=generator_workers, ) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(output_dir, "logs")), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr, verbose=1, mode="min", min_lr=min_lr), auroc, ] print("### Start training ### \n") history = model_train.fit( train_sequence, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_sequence, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, workers=generator_workers, shuffle=False, ) # dump history print("### Dump history ### \n") with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **") finally: os.remove(running_flag_file)
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") if not os.path.exists(output_dir): os.mkdir(output_dir) base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") image_dimension = cp["TRAIN"].getint("image_dimension") batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(output_dir, output_weights_name) data_set_dir = cp["TRAIN"].get("dataset_csv_dir") input_weights_name = cp["TRAIN"].get("input_weights_name") best_weights_path = os.path.join(data_set_dir, f"best_{input_weights_name}") # get test sample count test_counts, _ = get_sample_counts(data_set_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) # CAM config bbox_list_file = cp["CAM"].get("bbox_list_file") use_best_weights = cp["CAM"].getboolean("use_best_weights") print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("read bbox list file") df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1) df_images.columns = ["file_name", "label", "x", "y", "w", "h"] print("create a generator for loading transformed images") cam_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(data_set_dir, "test.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) image_output_dir = os.path.join(output_dir, "cam") if not os.path.isdir(image_output_dir): os.makedirs(image_output_dir) print("create CAM") df_images.apply( lambda g: create_cam( df_g=g, output_dir=image_output_dir, image_source_dir=image_source_dir, model=model, generator=cam_sequence, class_names=class_names, ), axis=1, )
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config weights_dir = cp["DEFAULT"].get("weights_dir") base_model_name = cp["DEFAULT"].get("base_model_name") chexnet_class_names = cp["DEFAULT"].get("chexnet_class_names").split(",") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") data_dir = cp["DEFAULT"].get("data_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") # parse weights file path output_weights_name = cp["TRAIN"].get("output_weights_name") weights_path = os.path.join(weights_dir, output_weights_name) # get test sample count test_counts = get_sample_counts(data_dir, "all_data", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(f""" test_steps: {test_steps} is invalid, please use 'auto' or integer. """) print(f"** test_steps: {test_steps} **") print("** load model **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(chexnet_class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path, pop_last_layer=True) model.summary() print("** load test generator **") test_sequence = AugmentedImageSequence( dataset_csv_file=os.path.join(data_dir, "all_data.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) print("** make prediction **") image, y = test_sequence.__getitem__(4) y_hat = model.predict(image) # y_hat = model.predict_generator(test_sequence, verbose=1) # y = test_sequence.get_y_true() print(y_hat.shape)
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") if not os.path.exists(output_dir): os.mkdir(output_dir) base_model_name = cp["DEFAULT"].get("base_model_name") class_names = cp["DEFAULT"].get("class_names").split(",") image_source_dir = cp["DEFAULT"].get("image_source_dir") data_set_dir = cp["TRAIN"].get("dataset_csv_dir") # train config image_dimension = cp["TRAIN"].getint("image_dimension") # test config batch_size = cp["TEST"].getint("batch_size") test_steps = cp["TEST"].get("test_steps") use_best_weights = cp["TEST"].getboolean("use_best_weights") # parse weights file path input_weights_name = cp["TRAIN"].get("input_weights_name") weights_path = os.path.join(data_set_dir, input_weights_name) best_weights_path = os.path.join(data_set_dir, "best_{}".format(input_weights_name)) # get test sample count test_counts, _ = get_sample_counts(data_set_dir, "test", class_names) # compute steps if test_steps == "auto": test_steps = int(test_counts / batch_size) else: try: test_steps = int(test_steps) except ValueError: raise ValueError(""" test_steps: {} is invalid, please use 'auto' or integer. """.format(test_steps)) print("** test_steps: {} **".format(test_steps)) print("** load model **") if use_best_weights: print("** use best weights **") model_weights_path = best_weights_path else: print("** use last weights **") model_weights_path = weights_path model_factory = ModelFactory() model = model_factory.get_model(class_names, model_name=base_model_name, use_base_weights=False, weights_path=model_weights_path) print("** load test generator **") test_sequence = AugmentedImageSequence( # dataset_csv_file=os.path.join(output_dir, "dev.csv"), dataset_csv_file=os.path.join(data_set_dir, "test.csv"), # dataset_csv_file=os.path.join(data_set_dir, "MIMIC_dataset.csv"), class_names=class_names, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=None, steps=test_steps, shuffle_on_epoch_end=False, ) # test_sequence.dataset_df.to_csv(os.path.join(output_dir, 'test_data_frame.csv')) print("** make prediction **") y_hat = model.predict_generator(test_sequence, verbose=1) y = test_sequence.get_y_true() # np.savetxt(os.path.join(output_dir, 'y_hat_1205_default_weight.txt'), y_hat) np.savetxt(os.path.join(output_dir, 'y_0430.txt'), y) test_log_path = os.path.join(output_dir, "test.log") print("** write log to {} **".format(test_log_path)) aurocs = [] with open(test_log_path, "w") as f: for i in range(len(class_names)): try: score = roc_auc_score(y[:, i], y_hat[:, i]) aurocs.append(score) except ValueError: score = 0 f.write("{}: {}\n".format(class_names[i], score)) mean_auroc = np.mean(aurocs) f.write("-------------------------\n") f.write("mean auroc: {}\n".format(mean_auroc)) print("mean auroc: {}".format(mean_auroc))