def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    print("** weight path is {} **".format(model_weights_path))
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_ids = PatientInfo('stage_1_test_images/', train=True)
        train=False, 
        img_dir='stage_1_test_images/*' #image_source_dir+'*', #'stage_1_test_images/*',
def load_ori_model(config_file="./config.ini"):
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    # load CheXNet model:
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)
    return model
Esempio n. 3
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    train_patient_count = cp["DEFAULT"].getint("train_patient_count")
    dev_patient_count = cp["DEFAULT"].getint("dev_patient_count")
    data_entry_file = cp["DEFAULT"].get("data_entry_file")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    use_class_balancing = cp["TRAIN"].getboolean("use_class_balancing")
    use_default_split = cp["TRAIN"].getboolean("use_default_split")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print(
            "** use trained model weights, turn on use_skip_split automatically **"
        )
        use_skip_split = True
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        use_skip_split = cp["TRAIN"].getboolean("use_skip_split ")
        training_stats = {}

    split_dataset_random_state = cp["TRAIN"].getint(
        "split_dataset_random_state")
    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        # split train/dev/test
        if use_default_split:
            datasets = ["train", "dev", "test"]
            for dataset in datasets:
                shutil.copy(f"./data/default_split/{dataset}.csv", output_dir)
        elif not use_skip_split:
            print("** split dataset **")
            split_data(
                data_entry_file,
                class_names,
                train_patient_count,
                dev_patient_count,
                output_dir,
                split_dataset_random_state,
            )

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(
            output_dir, "train", class_names)
        dev_counts, _ = get_sample_counts(output_dir, "dev", class_names)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(dev_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
            use_class_balancing=use_class_balancing)
        print("** class_weights **")
        for c, w in class_weights.items():
            print(f"  {c}: {w}")

        print("** load model **")
        if use_base_model_weights:
            base_model_weights_file = cp["TRAIN"].get(
                "base_model_weights_file")
        else:
            base_model_weights_file = None
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, f"best_{output_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir,
                                                  output_weights_name)
        else:
            model_weights_file = None
        model = get_model(class_names, base_model_weights_file,
                          model_weights_file)
        if show_model_summary:
            print(model.summary())

        # recreate symlink folder for ImageDataGenerator
        symlink_dir_name = "image_links"
        create_symlink(image_source_dir, output_dir, symlink_dir_name)

        print("** create image generators **")
        train_data_path = f"{output_dir}/{symlink_dir_name}/train/"
        train_generator = custom_image_generator(
            ImageDataGenerator(horizontal_flip=True, rescale=1. / 255),
            train_data_path,
            batch_size=batch_size,
            class_names=class_names,
        )
        dev_data_path = f"{output_dir}/{symlink_dir_name}/dev/"
        dev_generator = custom_image_generator(
            ImageDataGenerator(horizontal_flip=True, rescale=1. / 255),
            dev_data_path,
            batch_size=batch_size,
            class_names=class_names,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(output_weights_path)

        print("** compile model with class weights **")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")
        auroc = MultipleClassAUROC(
            generator=dev_generator,
            steps=validation_steps,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1),
            auroc,
        ]

        print("** training start **")
        history = model_train.fit_generator(
            generator=train_generator,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=dev_generator,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)
Esempio n. 4
0
def train_rsna_clf(train_data=None, validation_data=None, remove_running=True):
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names1 = cp["DEFAULT"].get("class_names1").split(",")
    class_names2 = cp["DEFAULT"].get("class_names2").split(",")

    # train config
    train_image_source_dir = cp["TRAIN"].get("train_image_source_dir")
    train_class_info = cp["TRAIN"].get("train_class_info")
    train_box_info = cp["TRAIN"].get("train_box_info")
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        if remove_running:
            os.remove(running_flag_file)
            open(running_flag_file, "a").close()
        else:
            raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(
            train_data.df, class_names2)
        validation_counts, _ = get_sample_counts(validation_data.df,
                                                 class_names2)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(validation_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("** class_weights **")
        print(class_weights)

        print("** load model **")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, f"best_{input_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir,
                                                  input_weights_name)
        else:
            model_weights_file = None

        model_factory = ModelFactory()
        model = model_factory.get_model(
            class_names1,
            model_name=base_model_name,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension, image_dimension, 3))
        model = modify_last_layer(model, class_names2)

        if show_model_summary:
            print(model.summary())

        train_sq = AugmentedLabelSequence_clf(
            train_data,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
        validation_sq = AugmentedLabelSequence_clf(
            validation_data,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(
                output_weights_path,
                save_weights_only=True,
                save_best_only=True,
                verbose=1,
            )

        print("** compile model with class weights **")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")
        auroc = MultipleClassAUROC(
            sequence=validation_sq,
            class_names=class_names2,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1,
                              mode="min",
                              min_lr=min_lr),
            auroc,
        ]

        print("** start training **")
        history = model_train.fit_generator(
            generator=train_sq,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sq,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)
Esempio n. 5
0
def main(fold,gender_train,gender_test):
    # parser config
    config_file = 'config_file.ini'
    cp = ConfigParser()
    cp.read(config_file)

    root_output_dir= cp["DEFAULT"].get("output_dir") 

    # default config 
    print(root_output_dir,gender_train)   
    output_dir= root_output_dir + gender_train+'/Fold_'+str(fold)+'/output/'

    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(root_output_dir+gender_train+'/Fold_'+str(fold),str(gender_test), class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(root_output_dir+gender_train+'/Fold_'+str(fold), str(gender_test)+".csv"),
     
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")

    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()

    y_pred_dir = output_dir + "y_pred_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"
    y_true_dir = output_dir + "y_true_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"


    np.savetxt(y_pred_dir, y_hat, delimiter=",")
    np.savetxt(y_true_dir, y, delimiter=",")
Esempio n. 6
0
def cxpl(model_dir, data_dir, results_subdir, random_seed, resolution):
    np.random.seed(random_seed)
    tf.set_random_seed(np.random.randint(1 << 31))
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    set_session(sess)

    # parser config
    config_file = model_dir+ "/config.ini"
    print("Config File Path:", config_file,flush=True)
    assert os.path.isfile(config_file)
    cp = ConfigParser()
    cp.read(config_file)

    output_dir = os.path.join(results_subdir, "classification_results/test")
    print("Output Directory:", output_dir,flush=True)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)


    # default config
    image_dimension = cp["TRAIN"].getint("image_dimension")
    gan_resolution = resolution
    batch_size = cp["TEST"].getint("batch_size")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    if use_best_weights:
        print("** Using BEST weights",flush=True)
        model_weights_path = os.path.join(results_subdir, "classification_results/train/best_weights.h5")
    else:
        print("** Using LAST weights",flush=True)
        model_weights_path = os.path.join(results_subdir, "classification_results/train/weights.h5")

    print("** DenseNet Input Resolution:", image_dimension, flush=True)
    print("** GAN Image Resolution:", gan_resolution, flush=True)

    # get test sample count
    test_dir = os.path.join(results_subdir, "inference/test")
    shutil.copy(test_dir+"/test.csv", output_dir)

    # Get class names 
    class_names = get_class_names(output_dir,"test")

    tfrecord_dir_te = os.path.join(data_dir, "test")
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)
    
    # get indicies (all of csv file for validation)
    print("** test counts:", test_counts, flush=True)

    # compute steps
    test_steps = int(np.floor(test_counts / batch_size))
    print("** test_steps:", test_steps, flush=True)

    log2_record = int(np.log2(gan_resolution))
    record_file_ending = "*"+ np.str(log2_record)+ ".tfrecords"
    print("** resolution ", gan_resolution, " corresponds to ", record_file_ending, " TFRecord file.", flush=True)

    # Get Model
    # ------------------------------------
    input_shape=(image_dimension, image_dimension, 3)
    img_input = Input(shape=input_shape)

    base_model = DenseNet121(
        include_top = False, 
        weights = None,
        input_tensor = img_input,
        input_shape = input_shape,
        pooling = "avg")

    x = base_model.output
    predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x)
    model = Model(inputs=img_input, outputs = predictions)

    print(" ** load model from:", model_weights_path, flush=True)
    model.load_weights(model_weights_path)
    # ------------------------------------

    print("** load test generator **", flush=True)
    test_seq = TFWrapper(
            tfrecord_dir=tfrecord_dir_te,
            record_file_endings = record_file_ending,
            batch_size = batch_size,
            model_target_size = (image_dimension, image_dimension),
            steps = None,
            augment=False,
            shuffle=False,
            prefetch=True,
            repeat=False)

    print("** make prediction **", flush=True)
    test_seq.initialise() 
    x_all, y_all = test_seq.get_all_test_data()
    print("X-Test  Shape:", x_all.shape,flush=True)
    print("Y-Test  Shape:", y_all.shape,flush=True)

    print("----------------------------------------", flush=True)
    print("Test Model AUROC", flush=True)
    y_pred = model.predict(x_all)
    current_auroc = []
    for i in range(len(class_names)):
        try:
            score = roc_auc_score(y_all[:, i], y_pred[:, i])
        except ValueError:
            score = 0
        current_auroc.append(score)
        print(i+1,class_names[i],": ", score, flush=True)
    mean_auroc = np.mean(current_auroc)
    print("Mean auroc: ", mean_auroc,flush=True)

    print("----------------------------------------", flush=True)
    downscale_factor  = 8
    num_models_to_use = 3
    num_test_images   = 100
    print("Number of Models to use:", num_models_to_use, flush=True)
    print("Number of Test images:", num_test_images, flush=True)
    x_tr, y_tr = x_all[num_test_images:], y_all[num_test_images:]
    x_te, y_te = x_all[0:num_test_images], y_all[0:num_test_images]

    downsample_factors = (downscale_factor,downscale_factor)
    print("Downsample Factors:", downsample_factors,flush=True)
    model_builder = UNetModelBuilder(downsample_factors, num_layers=2, num_units=8, activation="relu",
                                     p_dropout=0.0, verbose=0, batch_size=32, learning_rate=0.001)
    print("Model build done.",flush=True)
    masking_operation = ZeroMasking()
    loss = categorical_crossentropy

    explainer = CXPlain(model, model_builder, masking_operation, loss, 
                    num_models=num_models_to_use, downsample_factors=downsample_factors, flatten_for_explained_model=False)
    print("Explainer build done.",flush=True)

    explainer.fit(x_tr, y_tr);
    print("Explainer fit done.",flush=True)

    try:
        attr, conf = explainer.explain(x_te, confidence_level=0.80)
        np.save(output_dir+"/x_cxpl.npy", x_te)
        np.save(output_dir+"/y_cxpl.npy", y_te)
        np.save(output_dir+"/attr.npy", attr)
        np.save(output_dir+"/conf.npy", conf)
        print("Explainer explain done and saved.",flush=True)
    except Exception as ef: print(ef,flush=True)
Esempio n. 7
0
from tokenizer_wrapper import TokenizerWrapper
import matplotlib.pyplot as plt

config_file = "./config.ini"
cp = ConfigParser()
cp.read(config_file)
class_names = cp["Captioning_Model"].get("class_names").split(",")
image_source_dir = cp["Data"].get("image_source_dir")
data_dir = cp["Data"].get("data_dir")
all_data_csv = cp['Data'].get('all_data_csv')
training_csv = cp['Data'].get('training_set_csv')

image_dimension = cp["Chexnet_Default"].getint("image_dimension")

batch_size = cp["Captioning_Model_Train"].getint("batch_size")
training_counts = get_sample_counts(data_dir, training_csv)
EPOCHS = cp["Captioning_Model_Train"].getint("epochs")

max_sequence_length = cp['Captioning_Model'].getint('max_sequence_length')
tokenizer_vocab_size = cp['Captioning_Model'].getint('tokenizer_vocab_size')

BUFFER_SIZE = cp["Captioning_Model"].getint("buffer_size")
embedding_dim = cp["Captioning_Model"].getint("embedding_dim")
units = cp["Captioning_Model"].getint("units")

checkpoint_path = cp["Captioning_Model_Train"].get("ckpt_path")
continue_from_last_ckpt = cp["Captioning_Model_Train"].getboolean(
    "continue_from_last_ckpt")
# compute steps
steps = int(training_counts / batch_size)
print(f"** train_steps: {steps} **")
Esempio n. 8
0
def test(model_dir, data_dir, results_subdir, random_seed, resolution):
    np.random.seed(random_seed)
    tf.set_random_seed(np.random.randint(1 << 31))
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                  inter_op_parallelism_threads=1)
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    set_session(sess)

    # parser config
    config_file = model_dir + "/config.ini"
    print("Config File Path:", config_file, flush=True)
    assert os.path.isfile(config_file)
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    image_dimension = cp["TRAIN"].getint("image_dimension")
    batch_size = cp["TEST"].getint("batch_size")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    print("** DenseNet input resolution:", image_dimension, flush=True)
    print("** GAN image resolution:", resolution, flush=True)

    log2_record = int(np.log2(resolution))
    record_file_ending = "*" + np.str(log2_record) + ".tfrecords"
    print("** Resolution ",
          resolution,
          " corresponds to ",
          record_file_ending,
          " TFRecord file.",
          flush=True)

    output_dir = os.path.join(
        results_subdir,
        "classification_results_res_" + np.str(2**log2_record) + "/test")
    print("Output Directory:", output_dir, flush=True)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    if use_best_weights:
        print("** Using BEST weights", flush=True)
        model_weights_path = os.path.join(
            results_subdir, "classification_results_res_" +
            np.str(2**log2_record) + "/train/best_weights.h5")
    else:
        print("** Using LAST weights", flush=True)
        model_weights_path = os.path.join(
            results_subdir, "classification_results_res_" +
            np.str(2**log2_record) + "/train/weights.h5")

    # get test sample count
    shutil.copy(results_subdir[:-4] + "/test/test.csv", output_dir)
    tfrecord_dir_te = os.path.join(data_dir, "test")
    class_names = get_class_names(output_dir, "test")

    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # get indicies (all of csv file for validation)
    print("** test counts:", test_counts, flush=True)

    # compute steps
    test_steps = int(np.floor(test_counts / batch_size))
    print("** test_steps:", test_steps, flush=True)

    # Get Model
    # ------------------------------------
    input_shape = (image_dimension, image_dimension, 3)
    img_input = Input(shape=input_shape)

    base_model = DenseNet121(include_top=False,
                             weights=None,
                             input_tensor=img_input,
                             input_shape=input_shape,
                             pooling="avg")

    x = base_model.output
    predictions = Dense(len(class_names),
                        activation="sigmoid",
                        name="predictions")(x)
    model = Model(inputs=img_input, outputs=predictions)

    print(" ** load model from:", model_weights_path, flush=True)
    model.load_weights(model_weights_path)
    # ------------------------------------

    print("** load test generator **", flush=True)
    test_seq = TFWrapper(tfrecord_dir=tfrecord_dir_te,
                         record_file_endings=record_file_ending,
                         batch_size=batch_size,
                         model_target_size=(image_dimension, image_dimension),
                         steps=None,
                         augment=False,
                         shuffle=False,
                         prefetch=True,
                         repeat=False)

    print("** make prediction **", flush=True)
    test_seq.initialise()  #MAKE SURE REINIT
    y_hat = model.predict_generator(test_seq, workers=0)
    test_seq.initialise()  #MAKE SURE REINIT
    y = test_seq.get_y_true()
    test_log_path = os.path.join(output_dir, "test.log")
    print("** write log to", test_log_path, flush=True)
    aurocs = []
    tpr_fpr_thr = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            tpr, fpr, thr = roc_curve(y[:, i], y_hat[:, i])
            roc_rates = np.concatenate(
                (fpr.reshape(-1, 1), tpr.reshape(-1, 1), thr.reshape(-1, 1)),
                axis=1)
            tpr_fpr_thr.append(roc_rates)
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                if score < 0.5:
                    score = 1. - score
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(np.str(class_names[i]) + " : " + np.str(score) + "\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write("mean auroc: " + np.str(mean_auroc) + "\n")
        print("mean auroc:", mean_auroc, flush=True)

    roc_char = np.asarray(tpr_fpr_thr)
    np.save(output_dir + "/roc_char.npy", roc_char)
    print("Saved ROC data (TPR, FPR, THR) to:",
          output_dir + "/roc_char.npy",
          flush=True)
Esempio n. 9
0
def nn(model_dir, data_dir, results_subdir, random_seed, resolution):
    np.random.seed(random_seed)
    tf.set_random_seed(np.random.randint(1 << 31))
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                  inter_op_parallelism_threads=1)
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    set_session(sess)

    # parser config
    config_file = model_dir + "/config.ini"
    print("Config File Path:", config_file, flush=True)
    assert os.path.isfile(config_file)
    cp = ConfigParser()
    cp.read(config_file)

    output_dir = os.path.join(results_subdir, "classification_results/nn")
    train_outdir = os.path.join(results_subdir, "classification_results/train")
    print("Output Directory:", output_dir, flush=True)

    # default config
    image_dimension = cp["TRAIN"].getint("image_dimension")
    gan_resolution = resolution
    batch_size = cp["TEST"].getint("batch_size")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    if use_best_weights:
        print("** Using BEST weights", flush=True)
        model_weights_path = os.path.join(
            results_subdir, "classification_results/nn/best_weights.h5")
    else:
        print("** Using LAST weights", flush=True)
        model_weights_path = os.path.join(
            results_subdir, "classification_results/nn/weights.h5")

    print("** DenseNet Input Resolution:", image_dimension, flush=True)
    print("** GAN Image Resolution:", gan_resolution, flush=True)

    tfrecord_dir_tr = os.path.join(data_dir, "train")
    tfrecord_dir_te = os.path.join(results_subdir, "inference/test")
    # Get class names
    class_names = get_class_names(train_outdir, "train")
    counts, _ = get_sample_counts(train_outdir, "train", class_names)

    # get indicies (all of csv file for validation)
    print("** counts:", counts, flush=True)
    # compute steps
    train_steps = int(np.floor(counts / batch_size))
    print("** t_steps:", train_steps, flush=True)

    log2_record = int(np.log2(gan_resolution))
    record_file_ending = "*" + np.str(log2_record) + ".tfrecords"
    print("** resolution ",
          gan_resolution,
          " corresponds to ",
          record_file_ending,
          " TFRecord file.",
          flush=True)

    # Get Model
    # ------------------------------------
    input_shape = (image_dimension, image_dimension, 3)
    img_input = Input(shape=input_shape)

    base_model = DenseNet121(include_top=False,
                             weights=None,
                             input_tensor=img_input,
                             input_shape=input_shape,
                             pooling="avg")

    x = base_model.output
    predictions = Dense(len(class_names),
                        activation="sigmoid",
                        name="predictions")(x)
    model = Model(inputs=img_input, outputs=predictions)

    print(" ** load model from:", model_weights_path, flush=True)
    model.load_weights(model_weights_path)
    # ------------------------------------
    # Extract representation layer output:
    layer_name = 'avg_pool'
    intermediate_layer_model = Model(
        inputs=model.input, outputs=model.get_layer(layer_name).output)

    #intermediate_output = intermediate_layer_model(data)

    def renorm_and_save_npy(x, name):
        imagenet_mean = np.array([0.485, 0.456, 0.406])
        imagenet_std = np.array([0.229, 0.224, 0.225])
        x = x * imagenet_std + imagenet_mean
        save_path = output_dir + "/" + name + ".npy"
        np.save(save_path, x)
        print("** save npy images under: ", save_path, flush=True)

    def save_array(x, name):
        save_path = output_dir + "/" + name + ".npy"
        np.save(save_path, x)
        print("** save npy images under: ", save_path, flush=True)

    # Load test Inference images
    test_bs = 200
    print("** load inference images, save random n=", test_bs, flush=True)
    test_seq = TFWrapper(tfrecord_dir=tfrecord_dir_te,
                         record_file_endings=record_file_ending,
                         batch_size=test_bs,
                         model_target_size=(image_dimension, image_dimension),
                         steps=None,
                         augment=False,
                         shuffle=False,
                         prefetch=True,
                         repeat=False)
    test_seq.initialise()
    x, x_orig, x_label = test_seq.__getitem__(0)
    renorm_and_save_npy(x, name="real_inf_224")
    renorm_and_save_npy(x_orig, name="real_inf_256")
    save_array(x_label, name="real_inf_label")

    print("** Compute inf latent rep **", flush=True)
    x_latrep = intermediate_layer_model.predict(x)
    print("** Latent Size: ", x_latrep.shape, flush=True)

    # Load train Inference images
    print("** load train generator **", flush=True)
    train_seq = TFWrapper(tfrecord_dir=tfrecord_dir_tr,
                          record_file_endings=record_file_ending,
                          batch_size=batch_size,
                          model_target_size=(image_dimension, image_dimension),
                          steps=train_steps,
                          augment=False,
                          shuffle=False,
                          prefetch=True,
                          repeat=False)
    train_seq.initialise()
    print("** generator loaded **", flush=True)
    # Loop through training data and compute minimums
    H, H_orig = image_dimension, 256
    W, W_orig = image_dimension, 256
    D = 3
    BS = batch_size
    n = test_bs
    LS = x_latrep.shape[1]
    cur_nn_imgs = np.zeros((n, H, W, D))  #Current nn images
    cur_nn_imgs_orig = np.zeros((n, H_orig, W_orig, D))
    cur_nn_labels = np.zeros((n, x_label.shape[1]))
    cur_cos_min = np.ones((n, 1)) * 10000  #Current minimum cosine distance

    time_old = time.time()
    print("** Start nn determination **", flush=True)
    for i in range(0, train_steps):
        # Get batch images and lat. reps
        y, y_orig, y_label = train_seq.__getitem__(i)  #[BS,H,W,D]
        y_latrep = intermediate_layer_model.predict(y)  #[BS,LS]

        #y_reshaped = y.reshape([BS,1,H,W,D])   #Reshape for tiling [BS,1,H,W,D]
        #y_orig_reshaped = y_orig.reshape([BS,1,H_orig,W_orig,D])
        #y_label_reshaped = y_label.reshape([BS,1,x_label.shape[1]])

        y_tiled = np.tile(y, [1, n, 1, 1, 1])  #Tile: [BS,n,H,W,D]
        y_orig_tiled = np.tile(y_orig, [1, n, 1, 1, 1])
        y_label_tiled = np.tile(y_label, [1, n, 1])

        cosdis = np.ones(
            (n, BS)) - cosine_similarity(x_latrep, y_latrep)  #[n,BS]
        argmin_cosdis = np.argmin(cosdis, axis=1)  #[n,1]
        min_cosdis = np.min(cosdis, axis=1).reshape(n, 1)  #[n,1]

        min_y = y_tiled[:, argmin_cosdis].reshape(
            n, H, W, D)  #[n,H,W,D]: Min. Cosdis for each inf_img from batch
        min_y_orig = y_orig_tiled[:,
                                  argmin_cosdis].reshape(n, H_orig, W_orig, D)
        min_ylabel = y_label_tiled[:, argmin_cosdis].reshape(
            (n, x_label.shape[1]))

        t = np.where(
            min_cosdis < cur_cos_min
        )  #Indicies where min. cosdistance is smaller then current

        cur_cos_min[t[0]] = min_cosdis[t[0]]  #Update current cosdis minima
        cur_nn_imgs[t[0]] = min_y[t[0]]  #Update current nn images
        cur_nn_imgs_orig[t[0]] = min_y_orig[t[0]]
        cur_nn_labels[t[0]] = min_ylabel[t[0]]

        if i % 100 == 0 and i > 0:
            time_new = time.time()
            print("Iteration ", i, "/", train_steps,
                  "took %.2f seconds" % (time_new - time_old))
            time_old = time_new
            print("Current mean cos-distance:", np.mean(cur_cos_min))

    print("** Loop Done **", flush=True)
    renorm_and_save_npy(cur_nn_imgs, name="nn_images_224")
    renorm_and_save_npy(cur_nn_imgs_orig, name="nn_images_256")
    save_array(cur_cos_min, name="cosdistance_minimum")
    save_array(cur_nn_labels, name="nn_labels")
Esempio n. 10
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    weights_dir = cp["DEFAULT"].get("weights_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    chexnet_class_names = cp["DEFAULT"].get("chexnet_class_names").split(",")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    data_dir = cp["DEFAULT"].get("data_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(weights_dir, output_weights_name)

    # get test sample count
    test_counts = get_sample_counts(data_dir, "all_data", class_names)
    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")

    model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(chexnet_class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path,
                                    pop_last_layer=True)
    model.summary()
    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(data_dir, "all_data.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")
    image, y = test_sequence.__getitem__(4)

    y_hat = model.predict(image)
    # y_hat = model.predict_generator(test_sequence, verbose=1)
    # y = test_sequence.get_y_true()

    print(y_hat.shape)
Esempio n. 11
0
def main():

    # Instantiate config parser
    # as long as a configuration file is in the local directory of this training code
    # it will be utilized by the training script

    # TODO : Add a README for the configuration file used to configure this training cycle
    config_file = "./sample_config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # set a bunch of default config
    output_directory = cp["DEFAULT"].get("output_directory")
    image_source_directory = cp["DEFAULT"].get("image_source_directory")
    # TODO tie in base model name for model verioning for SavedModels
    base_model_name = cp["DEFAULT"].get("base_model_name")
    # Class names are passed in as array within the configuration script
    class_names = cp["DEFAULT"].get("class_names").split(",")
    model_version = cp["DEFAULT"].get("model_version")
    tensorboard_log_dir = cp["DEFAULT"].get("tensorboard_log_dir")

    # training configuration
    # See sample_config.ini for explanation of all of the parameters
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean("use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_learning_rate = cp["TRAIN"].getfloat("min_learning_rate")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat("positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")

    if use_trained_model_weights:
        print("<<< Using pretrained model weights! >>>")
        training_stats_file = os.path.join(output_directory, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            training_stats = json.load(open(training_stats_file))
        else: 
            training_stats = {}
    else:
        # start over again
        training_stats = {}
    
    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end configuration parser

    utility.check_create_output_dir(output_directory)
    utility.create_tensorboard_log_dir(tensorboard_log_dir)
    
    try:

        utility.backup_config_file(output_directory, config_file)

        datasets = ["train", "validation", "test"]
        for dataset in datasets:
            shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"), output_directory)

        train_counts, train_pos_counts = utility.get_sample_counts(output_directory, "train", class_names)
        validation_counts, _ = utility.get_sample_counts(output_directory, "validation", class_names)

        # compute steps

        # train steps var defined in config ini file
        # if set to standard auto, normalize train_steps
        # wrt batch_size, otherwise take user input
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except:
                raise ValueError(f"""
                train_steps : {train_steps} is invalid,
                please use 'auto' or specify an integer.
                """)
            print(f" <<< train_steps : {train_steps} >>>")

        if validation_steps == "auto":
            validation_steps = int(validation_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except:
                raise ValueError(f"""
                validation_steps : {validation_steps} is invalid,
                please use 'auto' or specify an integer.
                """)
        print(f" <<< validation_steps : {validation_steps} >>>")

        # class weights
        class_weights = utility.get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print(f"class_weights : {class_weights}")

        print(" <<< Loading Model >>>")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(output_directory, f"best_{output_weights_name}")
            else:
                model_weights_file = os.path.join(output_directory, output_weights_name)
        else:
            model_weights_file = None
        
        model_factory = modelwrap.Models()
        model = model_factory.get_model(
            class_names=class_names,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension,image_dimension,3)
        )

        if show_model_summary:
            print(model.summary())
        
        print(" <<< Creating Image Generators >>> ")
        train_sequence = generator.AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_directory, "train.csv"),
            class_names=class_names,
            source_image_dir=image_source_directory,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=utility.augmenter(),
            steps=train_steps,
        )
        
        validation_sequence = generator.AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_directory, "validation.csv"),
            class_names=class_names,
            source_image_dir=image_source_directory,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=utility.augmenter(),
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

        output_weights_path = os.path.join(output_directory, output_weights_name)
        print(f" <<< Set Output Weights Path to : {output_weights_path}")

        # TODO implement multi-gpu support

        model_train = model
        checkpoint = ModelCheckpoint(
            output_weights_path,
            save_weights_only=True,
            save_best_only=True,
            verbose=1
        )

        print(" <<< Compile model and class weights >>>")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(
            optimizer=optimizer, loss="binary_crossentropy"
        )

        auroc = MultiClassAUROC(
            sequence=validation_sequence,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )

        # serving_checkpoint = ServingCheckpoint(
        #     output_directory=output_directory,
        #     model=model,
        #     model_version=model_version,
        # )

        callbacks =[
            checkpoint,
            TensorBoard(log_dir=os.path.join(tensorboard_log_dir), batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr,
                            verbose=1, mode="min", min_lr=min_learning_rate),
            auroc
            ]

        print(" <<< Starting Model Training >>> ")
        history = model_train.fit_generator(
            generator=train_sequence,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sequence,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        model_class_weights = tf.convert_to_tensor(model_train.layers[-1].get_weights()[0], tf.float32)
        model_final_conv_layer = utility.get_output_layer(model_train, "bn")
        

        tensor_info_input = tf.saved_model.utils.build_tensor_info(model_train.input)
        tensor_info_output = tf.saved_model.utils.build_tensor_info(model_train.output)
        tensor_info_class_weights = tf.saved_model.utils.build_tensor_info(model_class_weights)
        tensor_info_final_conv_layer = tf.saved_model.utils.build_tensor_info(model_final_conv_layer.output)

        # export model for serving
        export_base_path = output_directory
        export_path = os.path.join(
            tf.compat.as_bytes(export_base_path),
            tf.compat.as_bytes(model_version)
        )

        prediction_signature = (
            tf.saved_model.signature_def_utils.build_signature_def(
                inputs={'images': tensor_info_input},
                outputs={'prediction': tensor_info_output, 'class_weights': tensor_info_class_weights, 'final_conv_layer': tensor_info_final_conv_layer},
                method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
            )
        )

        print(f" <<< Exporting Trained Model to {export_path} >>> ")
        builder = tf.saved_model.builder.SavedModelBuilder(export_path)

        with K.get_session() as sess:
            builder.add_meta_graph_and_variables(
                sess=sess, 
                tags=[tf.saved_model.tag_constants.SERVING],
                signature_def_map={'predict': prediction_signature}
            )
            builder.save()

        print(" <<< Export History >>>")
        with open(os.path.join(output_directory, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print(" <<< Export Complete! >>> ")

    finally:
        utility.delete_training_lock(output_directory)
Esempio n. 12
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    weights_path = os.path.join(data_set_dir, input_weights_name)
    best_weights_path = os.path.join(data_set_dir,
                                     "best_{}".format(input_weights_name))

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError("""
                test_steps: {} is invalid,
                please use 'auto' or integer.
                """.format(test_steps))
    print("** test_steps: {} **".format(test_steps))

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        #         dataset_csv_file=os.path.join(output_dir, "dev.csv"),
        dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        #         dataset_csv_file=os.path.join(data_set_dir, "MIMIC_dataset.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )
    #     test_sequence.dataset_df.to_csv(os.path.join(output_dir, 'test_data_frame.csv'))

    print("** make prediction **")
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()
    #     np.savetxt(os.path.join(output_dir, 'y_hat_1205_default_weight.txt'), y_hat)
    np.savetxt(os.path.join(output_dir, 'y_0430.txt'), y)

    test_log_path = os.path.join(output_dir, "test.log")
    print("** write log to {} **".format(test_log_path))
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write("{}: {}\n".format(class_names[i], score))
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write("mean auroc: {}\n".format(mean_auroc))
        print("mean auroc: {}".format(mean_auroc))
Esempio n. 13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_epoch', type=int, default=0)
    args = parser.parse_args()

    # Set Parameter #
    base_model_name = "DenseNet121"
    use_base_model_weights = True
    weights_path = None
    image_dimension = 224
    batch_size = 32
    epochs = 20
    class_names = ["Nodule", "Pneumothorax"]
    csv_path = './data/classification'
    image_source_dir = '/media/nfs/CXR/NIH/chest_xrays/NIH/data/images_1024x1024/'
    augmenter = None
    #  If train_steps is set to None, will calculate train steps by len(train)/batch_size
    train_steps = None
    positive_weights_multiply = 1
    outputs_path = './experiments/ae'
    weights_name = f'weights{args.model_epoch}.h5'
    output_weights_path = os.path.join(outputs_path, weights_name)
    initial_learning_rate = 0.0001
    training_stats = {}

    # Get Sample and Total Count From Training Data and Compute Class Weights #
    train_counts, train_pos_counts = get_sample_counts(csv_path, "train",
                                                       class_names)
    if train_steps == None:
        train_steps = int(train_counts / batch_size)
    dev_counts, _ = get_sample_counts(csv_path, "test", class_names)
    validation_steps = int(dev_counts / batch_size)
    print('***Compute Class Weights***')
    class_weights = get_class_weights(train_counts,
                                      train_pos_counts,
                                      multiply=positive_weights_multiply)
    print(class_weights)

    # Create Image Sequence #

    train_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(csv_path, "train.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=augmenter,
        steps=train_steps,
        model_epoch=args.model_epoch)

    validation_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(csv_path, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=augmenter,
        steps=validation_steps,
        shuffle_on_epoch_end=False,
        model_epoch=args.model_epoch)

    # Build Model #
    factory = ModelFactory()
    model = factory.get_model(class_names,
                              model_name=base_model_name,
                              use_base_weights=use_base_model_weights,
                              weights_path=None,
                              input_shape=(image_dimension, image_dimension,
                                           3))

    print("** check multiple gpu availability **")
    gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
    if gpus > 1:
        print("** multi_gpu_model is used! gpus={gpus} **")
        model_train = multi_gpu_model(model, gpus)
        # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
        checkpoint = MultiGPUModelCheckpoint(
            filepath=output_weights_path,
            base_model=model,
        )
    else:
        model_train = model
        checkpoint = ModelCheckpoint(
            output_weights_path,
            save_weights_only=True,
            save_best_only=True,
            verbose=1,
        )

    auroc = MultipleClassAUROC(sequence=validation_sequence,
                               class_names=class_names,
                               weights_path=output_weights_path,
                               stats=training_stats,
                               workers=8,
                               model_epoch=args.model_epoch)
    callbacks = [
        checkpoint,
        TensorBoard(log_dir=os.path.join(outputs_path, "logs"),
                    batch_size=batch_size),
        ReduceLROnPlateau(monitor='val_loss',
                          factor=0.1,
                          patience=1,
                          verbose=1,
                          mode="min",
                          min_lr=1e-8),
        auroc,
    ]

    # Compile Model #
    print('*** Start Compiling ***')
    optimizer = Adam(lr=initial_learning_rate)
    model_train.compile(optimizer=optimizer, loss="binary_crossentropy")

    # Train #
    print("** start training **")
    history = model_train.fit_generator(
        generator=train_sequence,
        steps_per_epoch=train_steps,
        epochs=epochs,
        validation_data=validation_sequence,
        validation_steps=validation_steps,
        callbacks=callbacks,
        class_weight=class_weights,
        workers=8,
        shuffle=False,
    )
    # dump history
    print("** dump history **")
    with open(os.path.join(outputs_path, f"history{args.model_epoch}.pkl"),
              "wb") as f:
        pickle.dump({
            "history": history.history,
            "auroc": auroc.aurocs,
        }, f)
    print("** done! **")
Esempio n. 14
0
from PIL import Image

config_file = "./config.ini"
cp = ConfigParser()
cp.read(config_file)

class_names = cp["Captioning_Model"].get("class_names").split(",")
image_source_dir = cp["Data"].get("image_source_dir")
data_dir = cp["Data"].get("data_dir")
all_data_csv = cp['Data'].get('all_data_csv')
testing_csv = cp['Data'].get('training_set_csv')

image_dimension = cp["Chexnet_Default"].getint("image_dimension")

batch_size = cp["Captioning_Model_Inference"].getint("batch_size")
testing_counts = get_sample_counts(data_dir, testing_csv)

max_sequence_length = cp['Captioning_Model'].getint('max_sequence_length')
tokenizer_vocab_size = cp['Captioning_Model'].getint('tokenizer_vocab_size')

# These two variables represent that vector shape
features_shape = cp["Captioning_Model"].getint("features_shape")
attention_features_shape = cp["Captioning_Model"].getint(
    "attention_features_shape")

BUFFER_SIZE = cp["Captioning_Model"].getint("buffer_size")
embedding_dim = cp["Captioning_Model"].getint("embedding_dim")
units = cp["Captioning_Model"].getint("units")

checkpoint_path = cp["Captioning_Model_Train"].get("ckpt_path")
Esempio n. 15
0
def main():
    # parser config
    config_file = "../../config.ini"
    cp = ConfigParser()
    cp.read(config_file)
    # test config
    output_dir = cp["TEST"].get("output_dir")
    batch_size = cp["TEST"].getint("batch_size")
    model_name = cp["TEST"].get("model_name")
    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    time_steps = cp["TRAIN"].getint("time_steps")
    input_dim_openface = cp["TRAIN"].getint("input_dim_openface")
    input_dim_openpose = cp["TRAIN"].getint("input_dim_openpose")
    input_dim_rgb400 = cp["TRAIN"].getint("input_dim_rgb400")
    csv_source_dir_openface = cp["DEFAULT"].get("csv_source_dir_openface")
    csv_source_dir_openpose = cp["DEFAULT"].get("csv_source_dir_openpose")
    csv_source_dir_rgb400 = cp["DEFAULT"].get("csv_source_dir_rgb400")
    featrue_name = cp["TEST"].get("featrue_name")
    cuDNN = cp["TRAIN"].getboolean("cuDNN")
    bidirect = cp["TEST"].getboolean("bidirect")
    units = cp["TEST"].getint("units")
    layers = cp["TEST"].getint("layers")
    with_FC_dropout_layers = cp["TEST"].getboolean("with_FC_dropout_layers")
    initializer = cp["TEST"].get("initializer")
    regularizers_fuc = cp["TEST"].get("regularizers_fuc")
    regularizers_l1 = cp["TEST"].getfloat("regularizers_l1")
    regularizers_l2 = cp["TEST"].getfloat("regularizers_l2")
    if regularizers_fuc == 'l1':
        regularizers_fuc = regularizers.l1(regularizers_l1)
    elif regularizers_fuc == 'l2':
        regularizers_fuc = regularizers.l2(regularizers_l2)
    elif regularizers_fuc == 'l1_l2':
        regularizers_fuc = regularizers.l1_l2(regularizers_l1, regularizers_l2)
    else:
        regularizers_fuc = None
    units_layers = []
    [units_layers.append(units) for i in range(layers)]
    test_csv = cp["TEST"].get("test_csv")

    # compute steps
    # test_counts = get_sample_counts("test")
    test_counts = get_sample_counts("val")
    test_steps = ceil(test_counts / batch_size)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    model_factory = ModelFactory(regularizers=regularizers_fuc,
                                 initializer=initializer,
                                 with_FC_dropout_layers=with_FC_dropout_layers)
    model_fun = getattr(model_factory, f'get_model_{model_name}')
    model = model_fun(
        TIME_STEPS=time_steps,
        INPUT_DIM=eval(f'input_dim_{featrue_name}'),
        weights_path=weights_path,
        CuDNN=cuDNN,
        bidirect=bidirect,
        units=units_layers,
    )

    print("** load test generator **")
    test_sequence = FeatruesSequence(
        dataset_csv_file=os.path.join('data', test_csv),
        csv_source_dir=eval(f'csv_source_dir_{featrue_name}'),
        batch_size=batch_size,
        shuffle_on_epoch_end=False,
        test=True)

    gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "0").split(","))
    if gpus > 1:
        print(f"** multi_gpu_model is used! gpus={gpus} **")
        model_train = multi_gpu_model(model, gpus)
    else:
        model_train = model

    print("** make prediction **")
    # model_train.compile(optimizer=Adam(), loss="mean_squared_error")
    prob_array = model_train.predict_generator(test_sequence,
                                               steps=test_steps,
                                               max_queue_size=8,
                                               workers=8,
                                               use_multiprocessing=True,
                                               verbose=1)
    print(prob_array)
    list(map(out_put_prediction, prob_array))
    print(pred_list)
    df_test = pd.read_csv(os.path.join('data', test_csv))
    df_test['predictions'] = pred_list
    df_test['probs'] = prob_array
    df_test[['openface_file', 'predictions',
             'probs']].to_csv(os.path.join(output_dir, 'result.csv'),
                              index=False)
Esempio n. 16
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    symlink_dir_name = "image_links"
    test_data_path = f"{output_dir}/{symlink_dir_name}/test/"

    step_test = int(test_counts / batch_size)
    print("** load test generator **")
    test_generator = custom_image_generator(
        ImageDataGenerator(horizontal_flip=True, rescale=1. / 255),
        test_data_path,
        batch_size=batch_size,
        class_names=class_names, cam=False
    )
    test_generator_orig = custom_image_generator(
        ImageDataGenerator(horizontal_flip=True, rescale=1. / 255),
        test_data_path,
        batch_size=batch_size,
        class_names=class_names, cam=True)
    x, y = load_generator_data(test_generator, step_test, len(class_names))
    x_orig, _ = load_generator_data(test_generator_orig, step_test, len(class_names))

    print("** load model **")
    model = get_model(class_names)
    if use_best_weights:
        print("** use best weights **")
        model.load_weights(best_weights_path)
    else:
        print("** use last weights **")
        model.load_weights(weights_path)

    print("** make prediction **")
    y_hat = model.predict(x, verbose=1)

    test_log_path = os.path.join(output_dir, "test.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[i], y_hat[i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")

    grad_cam(model, class_names, y, y_hat, x, x_orig)
Esempio n. 17
0
def train(model_dir, results_subdir, random_seed, resolution):
    np.random.seed(random_seed)
    tf.set_random_seed(np.random.randint(1 << 31))
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                  inter_op_parallelism_threads=1)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    set_session(sess)

    # parser config
    config_file = model_dir + "/config.ini"
    print("Config File Path:", config_file, flush=True)
    assert os.path.isfile(config_file)
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    base_model_name = cp["DEFAULT"].get("base_model_name")

    # train config
    path_model_base_weights = cp["TRAIN"].get("path_model_base_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    patience = cp["TRAIN"].getint("patience")
    samples_per_epoch = cp["TRAIN"].getint("samples_per_epoch")
    reduce_lr = cp["TRAIN"].getfloat("reduce_lr")

    print("** DenseNet input resolution:", image_dimension, flush=True)
    print("** GAN image resolution:", resolution, flush=True)
    print("** Patience epochs", patience, flush=True)
    print("** Samples per epoch:", samples_per_epoch, flush=True)

    log2_record = int(np.log2(resolution))
    record_file_ending = "*" + np.str(log2_record) + ".tfrecords"
    print("** Resolution ",
          resolution,
          " corresponds to ",
          record_file_ending,
          " TFRecord file.",
          flush=True)

    output_dir = os.path.join(
        results_subdir,
        "classification_results_res_" + np.str(2**log2_record) + "/train")
    print("Output Directory:", output_dir, flush=True)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        print("** use trained model weights **", flush=True)
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print("backup config file to", output_dir, flush=True)
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        tfrecord_dir_tr = os.path.join(results_subdir[:-4], "train")
        tfrecord_dir_vl = os.path.join(results_subdir[:-4], "valid")

        shutil.copy(tfrecord_dir_tr + "/train.csv", output_dir)
        shutil.copy(tfrecord_dir_vl + "/valid.csv", output_dir)

        # Get class names
        class_names = get_class_names(output_dir, "train")

        # get train sample counts
        train_counts, train_pos_counts = get_sample_counts(
            output_dir, "train", class_names)
        valid_counts, _ = get_sample_counts(output_dir, "valid", class_names)

        print("Total Training Data:", train_counts, flush=True)
        print("Total Validation Data:", valid_counts, flush=True)
        train_steps = int(min(samples_per_epoch, train_counts) / batch_size)
        print("** train_steps:", train_steps, flush=True)
        validation_steps = int(np.floor(valid_counts / batch_size))
        print("** validation_steps:", validation_steps, flush=True)

        # compute class weights
        print("** compute class weights from training data **", flush=True)
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("** class_weights **", flush=True)
        print(class_weights)

        print("** load model **", flush=True)
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, "best_" + output_weights_name)
            else:
                model_weights_file = os.path.join(output_dir,
                                                  output_weights_name)
        else:
            model_weights_file = None

        # Use downloaded weights
        if os.path.isfile(path_model_base_weights):
            base_weights = path_model_base_weights
            print("** Base weights will be loaded.", flush=True)
        else:
            base_weights = None
            print("** No Base weights.", flush=True)

        # Get Model
        # ------------------------------------
        input_shape = (image_dimension, image_dimension, 3)
        img_input = Input(shape=input_shape)

        base_model = DenseNet121(include_top=False,
                                 weights=base_weights,
                                 input_tensor=img_input,
                                 input_shape=input_shape,
                                 pooling="avg")

        x = base_model.output
        predictions = Dense(len(class_names),
                            activation="sigmoid",
                            name="predictions")(x)
        model = Model(inputs=img_input, outputs=predictions)

        if use_trained_model_weights and model_weights_file != None:
            print("** load model weights_path:",
                  model_weights_file,
                  flush=True)
            model.load_weights(model_weights_file)
        # ------------------------------------

        if show_model_summary:
            print(model.summary())

        print("** create image generators", flush=True)
        train_seq = TFWrapper(tfrecord_dir=tfrecord_dir_tr,
                              record_file_endings=record_file_ending,
                              batch_size=batch_size,
                              model_target_size=(image_dimension,
                                                 image_dimension),
                              steps=train_steps,
                              augment=True,
                              shuffle=True,
                              prefetch=True,
                              repeat=True)

        valid_seq = TFWrapper(tfrecord_dir=tfrecord_dir_vl,
                              record_file_endings=record_file_ending,
                              batch_size=batch_size,
                              model_target_size=(image_dimension,
                                                 image_dimension),
                              steps=None,
                              augment=False,
                              shuffle=False,
                              prefetch=True,
                              repeat=True)

        # Initialise train and valid iterats
        print("** Initialise train and valid iterators", flush=True)
        train_seq.initialise()
        valid_seq.initialise()

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print("** set output weights path to:",
              output_weights_path,
              flush=True)

        print("** SINGLE_gpu_model is used!", flush=True)
        model_train = model
        checkpoint = ModelCheckpoint(
            output_weights_path,
            save_weights_only=True,
            save_best_only=False,
            verbose=1,
        )

        print("** compile model with class weights **", flush=True)
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")

        auroc = MultipleClassAUROC(sequence=valid_seq,
                                   class_names=class_names,
                                   weights_path=output_weights_path,
                                   stats=training_stats,
                                   early_stop_p=patience,
                                   learn_rate_p=patience_reduce_lr,
                                   learn_rate_f=reduce_lr,
                                   min_lr=min_lr,
                                   workers=0)

        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size), auroc
        ]

        print("** start training **", flush=True)
        history = model_train.fit_generator(
            generator=train_seq,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=valid_seq,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=0,
            shuffle=False,
        )

        # dump history
        print("** dump history **", flush=True)
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **", flush=True)

    finally:
        os.remove(running_flag_file)
Esempio n. 18
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    best_weights_path = os.path.join(data_set_dir,
                                     f"best_{input_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("read bbox list file")
    df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1)
    df_images.columns = ["file_name", "label", "x", "y", "w", "h"]

    print("create a generator for loading transformed images")
    cam_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    image_output_dir = os.path.join(output_dir, "cam")
    if not os.path.isdir(image_output_dir):
        os.makedirs(image_output_dir)

    print("create CAM")
    df_images.apply(
        lambda g: create_cam(
            df_g=g,
            output_dir=image_output_dir,
            image_source_dir=image_source_dir,
            model=model,
            generator=cam_sequence,
            class_names=class_names,
        ),
        axis=1,
    )
Esempio n. 19
0
def modelpredict():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # test config
    batch_size = cp["DEV"].getint("batch_size")
    use_best_weights = cp["DEV"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "dev", class_names)

    symlink_dir_name = "image_links"
    dev_data_path = f"{output_dir}/{symlink_dir_name}/dev/"

    step_test = int(test_counts / batch_size)
    print("** load dev generator **")
    test_generator = custom_image_generator(
        ImageDataGenerator(horizontal_flip=True, rescale=1. / 255),
        dev_data_path,
        batch_size=batch_size,
        class_names=class_names,
    )
    x, y = load_generator_data(test_generator, step_test)

    print("** load model **")
    model = get_model(class_names)
    if use_best_weights:
        print("** use best weights **")
        model.load_weights(best_weights_path)
    else:
        print("** use last weights **")
        model.load_weights(weights_path)

    print("** make prediction **")
    y_hat = model.predict(x)
    # print(y_hat)
    # dev_log_path = os.path.join(output_dir, "dev.log")
    # print(f"** write log to {dev_log_path} **")
    # aurocs = []
    # with open(dev_log_path, "w") as f:
    #     for i in range(len(class_names)):
    #         try:
    #             score = roc_auc_score(y[i], y_hat[i])
    #             aurocs.append(score)
    #         except ValueError:
    #             score = 0
    #         f.write(f"{class_names[i]}: {score}\n")
    #     mean_auroc = np.mean(aurocs)
    #     f.write("-------------------------\n")
    #     f.write(f"mean auroc: {mean_auroc}\n")
    return y, y_hat
def load_model(config_file="./config.ini", change_arch=False, compile_=True):
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    # load CheXNet model:
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)
    if change_arch:
        #return model
        # input layer, output layer:
        input_layer = model.get_layer(index=0)
        chex_output = model.get_layer(index=-1)
        # add second last layer:
        intermediate_layer = model.get_layer(index=-2)
        rsna_add_layer = Dense(10, activation='relu', name='rsna_add_layer')(
            intermediate_layer.output)  # params are tentative
        rsna_clf_output = Dense(3,
                                activation='softmax',
                                name='rsna_clf_output')(concatenate(
                                    [rsna_add_layer, chex_output.output]))
        model = Model(inputs=[input_layer.input], outputs=[rsna_clf_output])
        losses = {'rsna_clf_output': 'categorical_crossentropy'}
        if compile_:
            print('** compile **')
            model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.])
    else:
        if compile_:
            print('** compile **')
            model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.])
    return model
Esempio n. 21
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")
    logs = []
    starttime = time()
    y_hat = model.predict_generator(test_sequence, verbose=1)
    logs.append(time() - starttime)
    print("time: " + str(logs))
    y = test_sequence.get_y_true()

    test_log_path = os.path.join(output_dir, "test.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")
        print(f"mean auroc: {mean_auroc}")
Esempio n. 22
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    train_patient_count = cp["DEFAULT"].getint("train_patient_count")
    dev_patient_count = cp["DEFAULT"].getint("dev_patient_count")
    data_entry_file = cp["DEFAULT"].get("data_entry_file")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean("use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat("positive_weights_multiply")
    use_class_balancing = cp["TRAIN"].getboolean("use_class_balancing")
    use_default_split = cp["TRAIN"].getboolean("use_default_split")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights, turn on use_skip_split automatically **")
        use_skip_split = True
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        use_skip_split = cp["TRAIN"].getboolean("use_skip_split ")
        training_stats = {}

    split_dataset_random_state = cp["TRAIN"].getint("split_dataset_random_state")
    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1]))

        # split train/dev/test
        if use_default_split:
            datasets = ["train", "dev", "test"]
            for dataset in datasets:
                shutil.copy(f"./data/default_split/{dataset}.csv", output_dir)
        elif not use_skip_split:
            print("** split dataset **")
            split_data(
                data_entry_file,
                class_names,
                train_patient_count,
                dev_patient_count,
                output_dir,
                split_dataset_random_state,
            )

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(output_dir, "train", class_names)
        dev_counts, _ = get_sample_counts(output_dir, "dev", class_names)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(dev_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
            use_class_balancing=use_class_balancing
        )
        print("** class_weights **")
        for c, w in class_weights.items():
            print(f"  {c}: {w}")

        print("** load model **")
        if use_base_model_weights:
            base_model_weights_file = cp["TRAIN"].get("base_model_weights_file")
        else:
            base_model_weights_file = None
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(output_dir, f"best_{output_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir, output_weights_name)
        else:
            model_weights_file = None
        model = get_model(class_names, base_model_weights_file, model_weights_file)
        if show_model_summary:
            print(model.summary())

        # recreate symlink folder for ImageDataGenerator
        symlink_dir_name = "image_links"
        create_symlink(image_source_dir, output_dir, symlink_dir_name)

        print("** create image generators **")
        train_data_path = f"{output_dir}/{symlink_dir_name}/train/"
        train_generator = custom_image_generator(
            ImageDataGenerator(horizontal_flip=True, rescale=1./255),
            train_data_path,
            batch_size=batch_size,
            class_names=class_names,
        )
        dev_data_path = f"{output_dir}/{symlink_dir_name}/dev/"
        dev_generator = custom_image_generator(
            ImageDataGenerator(horizontal_flip=True, rescale=1./255),
            dev_data_path,
            batch_size=batch_size,
            class_names=class_names,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(output_weights_path)

        print("** compile model with class weights **")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")
        auroc = MultipleClassAUROC(
            generator=dev_generator,
            steps=validation_steps,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr, verbose=1),
            auroc,
        ]

        print("** training start **")
        history = model_train.fit_generator(
            generator=train_generator,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=dev_generator,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)