예제 #1
0
def main():
    batch_size = 16
    epochs = 50

    save_dir = os.path.join(os.getcwd(), 'saved_models')
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    filepath = "saved_models/94482_23620_keras_cw_noDropOut_chexpert_pretrained_chexnet_1_{epoch:03d}_{val_loss:.5f}.h5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=False,
                                 mode='min')
    callbacks_list = [checkpoint]

    #new_model_name = '94482_23620_keras_chexpert_pretrained_chexnet_512_6_epochs_1.h5'

    base_model, model = get_model()

    # load old weights
    #old_model_name = 'keras_chexpert_pretrained_chexnet_512_6_epochs_10.h5'
    #model_path = os.path.join(save_dir, old_model_name)
    # model.load_weights(model_path)

    # print a model summary
    # print_summary(model)

    csv_file_path = 'chexpert/train_94482_frontal_6_classes_real_no_zeros_preprocessed.csv'
    #train_df = pd.read_csv(csv_file_path)

    class_weight = get_class_weight(csv_file_path, target_classes)

    train_generator = AugmentedImageSequence(dataset_csv_file=csv_file_path,
                                             class_names=target_classes,
                                             source_image_dir='./chexpert/',
                                             batch_size=batch_size)

    csv_file_path = 'chexpert/train_23620_frontal_6_classes_real_no_zeros_preprocessed.csv'
    #valid_df = pd.read_csv(csv_file_path)

    valid_generator = AugmentedImageSequence(dataset_csv_file=csv_file_path,
                                             class_names=target_classes,
                                             source_image_dir='./chexpert/',
                                             batch_size=batch_size)

    STEP_SIZE_TRAIN = train_generator.steps
    STEP_SIZE_VALID = valid_generator.steps

    model.fit_generator(generator=train_generator,
                        steps_per_epoch=STEP_SIZE_TRAIN,
                        validation_data=valid_generator,
                        validation_steps=STEP_SIZE_VALID,
                        epochs=epochs,
                        callbacks=callbacks_list,
                        class_weight=class_weight,
                        use_multiprocessing=True)
def get_generator(csv_path,FLAGS, data_augmenter=None):
    return AugmentedImageSequence(
        dataset_csv_file=csv_path,
        label_columns=FLAGS.csv_label_columns,
        class_names=FLAGS.classes,
        multi_label_classification=FLAGS.multi_label_classification,
        source_image_dir=FLAGS.image_directory,
        batch_size=FLAGS.batch_size,
        target_size=FLAGS.image_target_size,
        augmenter=data_augmenter,
        shuffle_on_epoch_end=False,
    )
예제 #3
0
def get_enqueuer(csv,batch_size, FLAGS, tokenizer_wrapper, augmenter=None):
    data_generator = AugmentedImageSequence(
        dataset_csv_file=csv,
        class_names=FLAGS.csv_label_columns,
        tokenizer_wrapper=tokenizer_wrapper,
        source_image_dir=FLAGS.image_directory,
        batch_size=batch_size,
        target_size=FLAGS.image_target_size,
        augmenter=augmenter,
        shuffle_on_epoch_end=True,
    )
    enqueuer = OrderedEnqueuer(data_generator,
                               use_multiprocessing=False,
                               shuffle=False)
    return enqueuer, data_generator.steps
예제 #4
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")
    logs = []
    starttime = time()
    y_hat = model.predict_generator(test_sequence, verbose=1)
    logs.append(time() - starttime)
    print("time: " + str(logs))
    y = test_sequence.get_y_true()

    test_log_path = os.path.join(output_dir, "test.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")
        print(f"mean auroc: {mean_auroc}")
예제 #5
0
# compute steps
steps = int(training_counts / batch_size)
print(f"** train_steps: {steps} **")

print("** load training generator **")

tokenizer_wrapper = TokenizerWrapper(os.path.join(data_dir, all_data_csv),
                                     class_names[0], max_sequence_length,
                                     tokenizer_vocab_size)

data_generator = AugmentedImageSequence(
    dataset_csv_file=os.path.join(data_dir, training_csv),
    class_names=class_names,
    tokenizer_wrapper=tokenizer_wrapper,
    source_image_dir=image_source_dir,
    batch_size=batch_size,
    target_size=(image_dimension, image_dimension),
    augmenter=augmenter,
    steps=steps,
    shuffle_on_epoch_end=True,
)

medical_w2v = Medical_W2V_Wrapper()
embeddings = medical_w2v.get_embeddings_matrix_for_words(
    tokenizer_wrapper.get_word_tokens_list(), tokenizer_vocab_size)
print(embeddings.shape)
del medical_w2v

encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, tokenizer_vocab_size, embeddings)
예제 #6
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("read bbox list file")
    df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1)
    df_images.columns = ["file_name", "label", "x", "y", "w", "h"]

    print("create a generator for loading transformed images")
    cam_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=1,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=1,
        shuffle_on_epoch_end=False,
    )

    image_output_dir = os.path.join(output_dir, "cam")
    if not os.path.isdir(image_output_dir):
        os.makedirs(image_output_dir)

    print("create CAM")
    df_images.apply(
        lambda g: create_cam(
            df_g=g,
            output_dir=image_output_dir,
            image_source_dir=image_source_dir,
            model=model,
            generator=cam_sequence,
            class_names=class_names,
        ),
        axis=1,
    )
예제 #7
0
    def train(self, log_dir, show_model=True):
        """ Training classification model
        """
        ###################################################################################
        augs = augmentation()
        optimizer = Adam(lr=config.LEARNING_RATE,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        early_stop = EarlyStopping(monitor="val_loss",
                                   min_delta=0,
                                   patience=20,
                                   verbose=1)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.8,
                                      patience=10,
                                      verbose=1,
                                      mode='auto',
                                      epsilon=0.0001,
                                      cooldown=5,
                                      min_lr=0.0001)
        ###################################################################################

        TRAIN_CSV_FP, tmp_train = tool.prepare_dataset(config.TRAIN)
        VAL_CSV_FP, tmp_val = tool.prepare_dataset(config.VAL)

        SAVE_WEIGHT_FP = os.path.join(log_dir, "{epoch:03d}-{val_loss:.4f}.h5")
        AUC_LOG_FP = os.path.join(log_dir, "auc.txt")

        # Make log directory if not exist
        if not os.path.isdir(log_dir):
            os.makedirs(log_dir)

        # Training dataset
        train_sequence = AugmentedImageSequence(
            csv_fp=TRAIN_CSV_FP,
            class_names=config.CLASS_NAMES,
            batch_size=config.BATCH_SIZE,
            target_size=(config.NET_INPUT_DIM, config.NET_INPUT_DIM),
            steps=config.TRAIN_STEPS,
            augmenter=augs,
        )

        # Validation dataset
        validation_sequence = AugmentedImageSequence(
            csv_fp=VAL_CSV_FP,
            class_names=config.CLASS_NAMES,
            batch_size=config.BATCH_SIZE,
            target_size=(config.NET_INPUT_DIM, config.NET_INPUT_DIM),
            steps=config.VAL_STEPS,
            shuffle_on_epoch_end=False,
        )

        # Load classification model
        model = ModelFactory().get_classification_model(
            class_num=len(config.CLASS_NAMES),
            model_name=config.MODEL_NAME,
            base_weights="imagenet",
            input_shape=(config.NET_INPUT_DIM, config.NET_INPUT_DIM, 3))
        if show_model: print(model.summary())

        model.compile(optimizer=optimizer,
                      loss=config.LOSS,
                      metrics=config.METRICS)

        # Callbacks
        checkpoint = ModelCheckpoint(SAVE_WEIGHT_FP,
                                     save_weights_only=False,
                                     save_best_only=False,
                                     verbose=0)
        tensorboard = TensorBoard(log_dir=os.path.join(log_dir, "logs"))
        csv_logger = CSVLogger(os.path.join(log_dir, "my_logger.csv"))
        auroc = MultipleClassAUROC(AUC_LOG_FP, validation_sequence,
                                   config.CLASS_NAMES,
                                   config.GENERATOR_WORKERS)
        callbacks = [
            checkpoint, tensorboard, csv_logger, early_stop, reduce_lr, auroc
        ]

        history = model.fit_generator(generator=train_sequence,
                                      steps_per_epoch=config.TRAIN_STEPS,
                                      epochs=config.EPOCH,
                                      verbose=2,
                                      validation_data=validation_sequence,
                                      callbacks=callbacks,
                                      workers=config.GENERATOR_WORKERS,
                                      shuffle=False)

        print("\nFinished training")
        if tmp_train: os.remove(TRAIN_CSV_FP)
        if tmp_val: os.remove(VAL_CSV_FP)
예제 #8
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        datasets = ["train", "dev", "test"]
        for dataset in datasets:
            shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"),
                        output_dir)

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(
            output_dir, "train", class_names)
        dev_counts, _ = get_sample_counts(output_dir, "dev", class_names)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(dev_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("** class_weights **")
        print(class_weights)

        print("** load model **")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, f"best_{output_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir,
                                                  output_weights_name)
        else:
            model_weights_file = None

        model_factory = ModelFactory()
        model = model_factory.get_model(
            class_names,
            model_name=base_model_name,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension, image_dimension, 3))

        if show_model_summary:
            print(model.summary())

        print("** create image generators **")
        train_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
        validation_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "dev.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(
                output_weights_path,
                save_weights_only=True,
                save_best_only=True,
                verbose=1,
            )

        print("** compile model with class weights **")
        #model.compile(RAdam(), loss='mse')
        #optimizer = Adam(lr=initial_learning_rate)
        optimizer = RAdam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss=[focal_loss])
        lookahead = Lookahead(k=5, alpha=0.5)  # Initialize Lookahead
        lookahead.inject(model_train)  # add into model
        auroc = MultipleClassAUROC(
            sequence=validation_sequence,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1,
                              mode="min",
                              min_lr=min_lr),
            auroc,
            EarlyStopping(monitor='val_loss',
                          mode='min',
                          verbose=1,
                          patience=25),
        ]

        print("** start training **")
        history = model_train.fit_generator(
            generator=train_sequence,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sequence,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)
예제 #9
0
def main(fold,gender_train,gender_test):
    # parser config
    config_file = 'config_file.ini'
    cp = ConfigParser()
    cp.read(config_file)

    root_output_dir= cp["DEFAULT"].get("output_dir") 

    # default config 
    print(root_output_dir,gender_train)   
    output_dir= root_output_dir + gender_train+'/Fold_'+str(fold)+'/output/'

    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(root_output_dir+gender_train+'/Fold_'+str(fold),str(gender_test), class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(root_output_dir+gender_train+'/Fold_'+str(fold), str(gender_test)+".csv"),
     
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")

    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()

    y_pred_dir = output_dir + "y_pred_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"
    y_true_dir = output_dir + "y_true_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"


    np.savetxt(y_pred_dir, y_hat, delimiter=",")
    np.savetxt(y_true_dir, y, delimiter=",")
예제 #10
0
def main():
    # parser config
    print("### Input configuration file ### \n")
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    print("### Read default configurations ### \n")
    output_dir = cp["DEFAULT"].get("output_dir")
    image_train_source_dir = cp["DEFAULT"].get("image_train_source_dir")
    image_valid_source_dir = cp["DEFAULT"].get("image_valid_source_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
    print("### Reading training configurations ### \n")
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")

    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    print("### Show model summary ### \n")
    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    print("### Check output directory ### \n")
    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()
    try:
        print("### Backup config file to {} \n".format(output_dir))
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        datasets = ["train", "valid"]
        for dataset in datasets:
            shutil.copy(os.path.join(dataset_csv_dir, dataset + '.csv'),
                        output_dir)

        # get train/dev sample counts
        print("### Get class frequencies ### \n")
        train_counts, train_pos_counts = get_sample_counts(
            output_dir, "train", class_names)
        dev_counts, _ = get_sample_counts(output_dir, "valid", class_names)

        # compute steps
        print("### Compute step size ### \n")
        train_steps = int(train_counts / batch_size)
        validation_steps = int(dev_counts / batch_size)

        # compute class weights
        print("### Class weights ### \n")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("### Class_weights ### \n")
        print(class_weights)
        print("\n")

        print("### Loading model ### \n")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, "best_" + output_weights_name)
            else:
                model_weights_file = os.path.join(output_dir,
                                                  output_weights_name)
        else:
            model_weights_file = None

        model_factory = ModelFactory()
        print("### Get model ### \n")
        model = model_factory.get_model(
            class_names,
            model_name=base_model_name,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension, image_dimension, 3))

        print("Show model summary? {}".format(show_model_summary))
        if show_model_summary:
            print(model.summary())

        print("\n ### Create image generators ### \n")
        train_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            class_names=class_names,
            source_image_dir=image_train_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
        validation_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "valid.csv"),
            class_names=class_names,
            source_image_dir=image_valid_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print("### Set output weights path to {} ### \n".format(
            output_weights_path))

        print("### Check multiple gpu availability ### \n")
        #gpus = len(os.getenv("CUDA_VISIBLE_DEVICES").split(","))
        if False:  ## Turn off multiple gpu model
            print("### Multi_gpu_model is used! gpus={} ###".format(gpus))
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(
                output_weights_path,
                save_weights_only=True,
                save_best_only=True,
                verbose=1,
            )

        print("### Compile model with class weights ### \n")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")
        auroc = MultipleClassAUROC(
            sequence=validation_sequence,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs")),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1,
                              mode="min",
                              min_lr=min_lr),
            auroc,
        ]

        print("### Start training ### \n")

        history = model_train.fit(
            train_sequence,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sequence,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        # dump history
        print("### Dump history ### \n")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)
예제 #11
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    best_weights_path = os.path.join(data_set_dir,
                                     f"best_{input_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("read bbox list file")
    df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1)
    df_images.columns = ["file_name", "label", "x", "y", "w", "h"]

    print("create a generator for loading transformed images")
    cam_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    image_output_dir = os.path.join(output_dir, "cam")
    if not os.path.isdir(image_output_dir):
        os.makedirs(image_output_dir)

    print("create CAM")
    df_images.apply(
        lambda g: create_cam(
            df_g=g,
            output_dir=image_output_dir,
            image_source_dir=image_source_dir,
            model=model,
            generator=cam_sequence,
            class_names=class_names,
        ),
        axis=1,
    )
예제 #12
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    weights_dir = cp["DEFAULT"].get("weights_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    chexnet_class_names = cp["DEFAULT"].get("chexnet_class_names").split(",")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    data_dir = cp["DEFAULT"].get("data_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(weights_dir, output_weights_name)

    # get test sample count
    test_counts = get_sample_counts(data_dir, "all_data", class_names)
    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")

    model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(chexnet_class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path,
                                    pop_last_layer=True)
    model.summary()
    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(data_dir, "all_data.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")
    image, y = test_sequence.__getitem__(4)

    y_hat = model.predict(image)
    # y_hat = model.predict_generator(test_sequence, verbose=1)
    # y = test_sequence.get_y_true()

    print(y_hat.shape)
예제 #13
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    weights_path = os.path.join(data_set_dir, input_weights_name)
    best_weights_path = os.path.join(data_set_dir,
                                     "best_{}".format(input_weights_name))

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError("""
                test_steps: {} is invalid,
                please use 'auto' or integer.
                """.format(test_steps))
    print("** test_steps: {} **".format(test_steps))

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        #         dataset_csv_file=os.path.join(output_dir, "dev.csv"),
        dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        #         dataset_csv_file=os.path.join(data_set_dir, "MIMIC_dataset.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )
    #     test_sequence.dataset_df.to_csv(os.path.join(output_dir, 'test_data_frame.csv'))

    print("** make prediction **")
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()
    #     np.savetxt(os.path.join(output_dir, 'y_hat_1205_default_weight.txt'), y_hat)
    np.savetxt(os.path.join(output_dir, 'y_0430.txt'), y)

    test_log_path = os.path.join(output_dir, "test.log")
    print("** write log to {} **".format(test_log_path))
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write("{}: {}\n".format(class_names[i], score))
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write("mean auroc: {}\n".format(mean_auroc))
        print("mean auroc: {}".format(mean_auroc))