コード例 #1
0
def main(config_path, type, n_cluster, n_neighbors):
    """
    Ablation study on Contrastive or AE representation.
    """
    # Load config file
    cfg = Config(settings=None)
    cfg.load_config(config_path)

    # Update n_cluster and n_neighbors if provided
    if n_cluster > 0:
        cfg.settings['n_cluster'] = n_cluster
    if n_neighbors > 0:
        cfg.settings['n_neighbors'] = n_neighbors

    # Get path to output
    OUTPUT_PATH = cfg.settings['PATH']['OUTPUT'] + cfg.settings[
        'Experiment_Name'] + datetime.today().strftime('%Y_%m_%d_%Hh%M') + '/'
    # make output dir
    if not os.path.isdir(OUTPUT_PATH + 'models/'):
        os.makedirs(OUTPUT_PATH + 'model/', exist_ok=True)
    if not os.path.isdir(OUTPUT_PATH + 'results/'):
        os.makedirs(OUTPUT_PATH + 'results/', exist_ok=True)
    if not os.path.isdir(OUTPUT_PATH + 'logs/'):
        os.makedirs(OUTPUT_PATH + 'logs/', exist_ok=True)

    for seed_i, seed in enumerate(cfg.settings['seeds']):
        ############################### Set Up #################################
        # initialize logger
        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger()
        try:
            logger.handlers[1].stream.close()
            logger.removeHandler(logger.handlers[1])
        except IndexError:
            pass
        logger.setLevel(logging.INFO)
        formatter = logging.Formatter(
            '%(asctime)s | %(levelname)s | %(message)s')
        log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

        # print path
        logger.info(f"Log file : {log_file}")
        logger.info(f"Data path : {cfg.settings['PATH']['DATA']}")
        logger.info(f"Outputs path : {OUTPUT_PATH}" + "\n")

        # Set seed
        if seed != -1:
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            torch.backends.cudnn.deterministic = True
            logger.info(
                f"Set seed {seed_i+1:02}/{len(cfg.settings['seeds']):02} to {seed}"
            )

        # set number of thread
        if cfg.settings['n_thread'] > 0:
            torch.set_num_threads(cfg.settings['n_thread'])

        # check if GPU available
        cfg.settings['device'] = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        # Print technical info in logger
        logger.info(f"Device : {cfg.settings['device']}")
        logger.info(f"Number of thread : {cfg.settings['n_thread']}")

        ############################### Split Data #############################
        # Load data informations
        df_info = pd.read_csv(cfg.settings['PATH']['DATA_INFO'])
        df_info = df_info.drop(df_info.columns[0], axis=1)
        # remove low contrast images (all black)
        df_info = df_info[df_info.low_contrast == 0]

        # Train Validation Test Split
        spliter = MURA_TrainValidTestSplitter(
            df_info,
            train_frac=cfg.settings['Split']['train_frac'],
            ratio_known_normal=cfg.settings['Split']['known_normal'],
            ratio_known_abnormal=cfg.settings['Split']['known_abnormal'],
            random_state=42)
        spliter.split_data(verbose=False)
        train_df = spliter.get_subset('train')
        valid_df = spliter.get_subset('valid')
        test_df = spliter.get_subset('test')

        # print info to logger
        for key, value in cfg.settings['Split'].items():
            logger.info(f"Split param {key} : {value}")
        logger.info("Split Summary \n" +
                    str(spliter.print_stat(returnTable=True)))

        ############################### Load model #############################
        if type == 'SimCLR':
            net = Encoder(MLP_Neurons_layer=[512, 256, 128])
            init_key = 'repr_net_dict'
        elif type == 'AE':
            net = AE_net(MLP_Neurons_layer_enc=[512, 256, 128],
                         MLP_Neurons_layer_dec=[128, 256, 512],
                         output_channels=1)
            init_key = 'ae_net_dict'

        net = net.to(cfg.settings['device'])
        pretrain_state_dict = torch.load(cfg.settings['model_path_name'] +
                                         f'{seed_i+1}.pt',
                                         map_location=cfg.settings['device'])
        net.load_state_dict(pretrain_state_dict[init_key])
        logger.info('Model weights successfully loaded from ' +
                    cfg.settings['model_path_name'] + f'{seed_i+1}.pt')

        ablation_model = NearestNeighbors(
            net,
            kmeans_reduction=cfg.settings['kmeans_reduction'],
            batch_size=cfg.settings['batch_size'],
            n_job_dataloader=cfg.settings['num_worker'],
            print_batch_progress=cfg.settings['print_batch_progress'],
            device=cfg.settings['device'])

        ############################### Training ###############################
        # make dataset
        train_dataset = MURA_Dataset(
            train_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'],
            data_augmentation=False)
        valid_dataset = MURA_Dataset(
            valid_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'],
            data_augmentation=False)
        test_dataset = MURA_Dataset(
            test_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'],
            data_augmentation=False)

        logger.info("Online preprocessing pipeline : \n" +
                    str(train_dataset.transform) + "\n")

        # Train
        ablation_model.train(train_dataset,
                             n_cluster=cfg.settings['n_cluster'])

        # Evaluate
        logger.info(
            f'--- Validation with {cfg.settings["n_neighbors"]} neighbors')
        ablation_model.evaluate(valid_dataset,
                                n_neighbors=cfg.settings['n_neighbors'],
                                mode='valid')
        logger.info(f'--- Test with {cfg.settings["n_neighbors"]} neighbors')
        ablation_model.evaluate(test_dataset,
                                n_neighbors=cfg.settings['n_neighbors'],
                                mode='test')

        ############################## Save Results ############################
        ablation_model.save_results(OUTPUT_PATH +
                                    f'results/results_{seed_i+1}.json')
        logger.info("Results saved at " + OUTPUT_PATH +
                    f"results/results_{seed_i+1}.json")

        ablation_model.save_model(OUTPUT_PATH +
                                  f'model/ablation_{seed_i+1}.pt')
        logger.info("model saved at " + OUTPUT_PATH +
                    f"model/abaltion_{seed_i+1}.pt")

    # save config file
    cfg.settings['device'] = str(cfg.settings['device'])
    cfg.save_config(OUTPUT_PATH + 'config.json')
    logger.info("Config saved at " + OUTPUT_PATH + "config.json")
コード例 #2
0
def main(ratio_known_abnormal_i):
    """
    Joint DSAD experiment with different fraction of known abnormal sample : 0.0%
    0.25% 0.5% 1.0% 5.0% and 10.0% over a single replicate.
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{ratio_known_abnormal_i:.2%}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path and main docstring with experiment summary
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(
        df_info,
        train_frac=train_frac,
        ratio_known_normal=ratio_known_normal,
        ratio_known_abnormal=ratio_known_abnormal_i,
        random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size,
                                 data_augmentation=True)
    valid_dataset = MURA_Dataset(valid_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size,
                                 data_augmentation=False)
    test_dataset = MURA_Dataset(test_df,
                                data_path=DATA_PATH,
                                load_mask=True,
                                load_semilabels=True,
                                output_size=img_size,
                                data_augmentation=False)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal_i:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' +
                str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        logger.info(f'Set seed to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(
        f'Number of dataloader worker for Joint DeepSAD : {n_jobs_dataloader}'
        + '\n')

    ######################### Networks Initialization ##########################
    net = AE_SVDD_Hybrid(pretrain_ResNetEnc=ae_pretrain,
                         output_channels=ae_out_size[0],
                         return_svdd_embed=True)
    net = net.to(device)

    # add info to logger
    logger.info(f'Network : {net.__class__.__name__}')
    logger.info(f'Autoencoder pretrained on ImageNet : {ae_pretrain}')
    logger.info(f'DeepSAD eta : {eta}')
    logger.info('Network architecture: \n' +
                summary_string(net, (1, img_size, img_size),
                               device=str(device),
                               batch_size=batch_size) + '\n')

    # initialization of the Model
    jointDeepSAD = JointDeepSAD(net, eta=eta)

    if model_path_to_load:
        jointDeepSAD.load_model(model_path_to_load, map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ################################ Training ##################################
    # add parameter info
    logger.info(f'Joint DeepSAD number of epoch : {n_epoch}')
    logger.info(
        f'Joint DeepSAD number of pretraining epoch: {n_epoch_pretrain}')
    logger.info(f'Joint DeepSAD learning rate : {lr}')
    logger.info(f'Joint DeepSAD learning rate milestone : {lr_milestone}')
    logger.info(f'Joint DeepSAD weight_decay : {weight_decay}')
    logger.info(f'Joint DeepSAD optimizer : Adam')
    logger.info(f'Joint DeepSAD batch_size {batch_size}')
    logger.info(
        f'Joint DeepSAD number of dataloader worker : {n_jobs_dataloader}')
    logger.info(
        f'Joint DeepSAD criterion weighting : {criterion_weight[0]} Reconstruction loss + {criterion_weight[1]} SVDD embdedding loss'
        + '\n')

    # train DeepSAD
    jointDeepSAD.train(train_dataset,
                       lr=lr,
                       n_epoch=n_epoch,
                       n_epoch_pretrain=n_epoch_pretrain,
                       lr_milestone=lr_milestone,
                       batch_size=batch_size,
                       weight_decay=weight_decay,
                       device=device,
                       n_jobs_dataloader=n_jobs_dataloader,
                       print_batch_progress=print_batch_progress,
                       criterion_weight=criterion_weight,
                       valid_dataset=None)

    # validate DeepSAD
    jointDeepSAD.validate(valid_dataset,
                          device=device,
                          n_jobs_dataloader=n_jobs_dataloader,
                          print_batch_progress=print_batch_progress,
                          criterion_weight=criterion_weight)

    # test DeepSAD
    jointDeepSAD.test(test_dataset,
                      device=device,
                      n_jobs_dataloader=n_jobs_dataloader,
                      print_batch_progress=print_batch_progress,
                      criterion_weight=criterion_weight)

    # save results
    jointDeepSAD.save_results(
        OUTPUT_PATH +
        f'results/JointDeepSAD_results_{ratio_known_abnormal_i:.2%}.json')
    logger.info(
        'Test results saved at ' + OUTPUT_PATH +
        f'results/JointDeepSAD_results_{ratio_known_abnormal_i:.2%}.json')

    # save model
    jointDeepSAD.save_model(
        OUTPUT_PATH +
        f'model/JointDeepSAD_model_{ratio_known_abnormal_i:.2%}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH +
                f'model/JointDeepSAD_model_{ratio_known_abnormal_i:.2%}.pt')
def main(seed_i):
    """
    Train jointly the AutoEncoder and the DeepSAD model following Lukas Ruff et
    al. (2019) work adapted to the MURA dataset (preprocessing inspired from the
    work of Davletshina et al. (2020)). The network structure is a ResNet18
    AutoEncoder until the Adaptative average pooling layer. The AE embdedding is
    thus (512, 16, 16). This embdedding is further processed through 3 convolutional
    layers (specific to the SVDD embdedding generation) to provide the SVDD
    embdedding of 512. The network is trained with two loss functions: a masked MSE
    loss for the reconstruction and the DeepSAD loss on the embedding. The two
    losses are scaled to be comparable by perfoming one forward pass prior the
    training. The Encoder is not initialized with weights trained on ImageNet.
    The AE masked reconstruction loss is not computed for known abnormal sample
    so that the AE learn to reconstruct normal samples only. The network input is
    masked with the mask : only the body part is kept and the background is set
    to zero. The AE is pretrained over 5 epochs in order to improve the initialization
    of the hypersphere center (we hypothetize that with a pretrained AE the
    hypersphere center estimation will be more meaningful). Note that the 'affine'
    parameters of BatchNorm2d layers has been changed to False in this implementation.
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path and main docstring with experiment summary
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    #df_info = df_info[df_info.body_part == 'HAND']

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(
        df_info,
        train_frac=train_frac,
        ratio_known_normal=ratio_known_normal,
        ratio_known_abnormal=ratio_known_abnormal,
        random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size,
                                 data_augmentation=True)
    valid_dataset = MURA_Dataset(valid_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size,
                                 data_augmentation=False)
    test_dataset = MURA_Dataset(test_df,
                                data_path=DATA_PATH,
                                load_mask=True,
                                load_semilabels=True,
                                output_size=img_size,
                                data_augmentation=False)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' +
                str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    seed = seeds[seed_i]
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        logger.info(f'Set seed {seed_i+1:02}/{n_seeds:02} to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(
        f'Number of dataloader worker for Joint DeepSAD : {n_jobs_dataloader}'
        + '\n')

    ######################### Networks Initialization ##########################
    net = AE_SVDD_Hybrid(pretrain_ResNetEnc=ae_pretrain,
                         output_channels=ae_out_size[0],
                         return_svdd_embed=True)
    net = net.to(device)

    # add info to logger
    logger.info(f'Network : {net.__class__.__name__}')
    logger.info(f'Autoencoder pretrained on ImageNet : {ae_pretrain}')
    logger.info(f'DeepSAD eta : {eta}')
    logger.info('Network architecture: \n' +
                summary_string(net, (1, img_size, img_size),
                               device=str(device),
                               batch_size=batch_size) + '\n')

    # initialization of the Model
    jointDeepSAD = JointDeepSAD(net, eta=eta)

    if model_path_to_load:
        jointDeepSAD.load_model(model_path_to_load, map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ################################ Training ##################################
    # add parameter info
    logger.info(f'Joint DeepSAD number of epoch : {n_epoch}')
    logger.info(
        f'Joint DeepSAD number of pretraining epoch: {n_epoch_pretrain}')
    logger.info(f'Joint DeepSAD learning rate : {lr}')
    logger.info(f'Joint DeepSAD learning rate milestone : {lr_milestone}')
    logger.info(f'Joint DeepSAD weight_decay : {weight_decay}')
    logger.info(f'Joint DeepSAD optimizer : Adam')
    logger.info(f'Joint DeepSAD batch_size {batch_size}')
    logger.info(
        f'Joint DeepSAD number of dataloader worker : {n_jobs_dataloader}')
    logger.info(
        f'Joint DeepSAD criterion weighting : {criterion_weight[0]} Reconstruction loss + {criterion_weight[1]} SVDD embdedding loss'
        + '\n')

    # train DeepSAD
    jointDeepSAD.train(train_dataset,
                       lr=lr,
                       n_epoch=n_epoch,
                       n_epoch_pretrain=n_epoch_pretrain,
                       lr_milestone=lr_milestone,
                       batch_size=batch_size,
                       weight_decay=weight_decay,
                       device=device,
                       n_jobs_dataloader=n_jobs_dataloader,
                       print_batch_progress=print_batch_progress,
                       criterion_weight=criterion_weight)

    # validate DeepSAD
    jointDeepSAD.validate(valid_dataset,
                          device=device,
                          n_jobs_dataloader=n_jobs_dataloader,
                          print_batch_progress=print_batch_progress,
                          criterion_weight=criterion_weight)

    # test DeepSAD
    jointDeepSAD.test(test_dataset,
                      device=device,
                      n_jobs_dataloader=n_jobs_dataloader,
                      print_batch_progress=print_batch_progress,
                      criterion_weight=criterion_weight)

    # save results
    jointDeepSAD.save_results(OUTPUT_PATH +
                              f'results/JointDeepSAD_results_{seed_i+1}.json')
    logger.info('Test results saved at ' + OUTPUT_PATH +
                f'results/JointDeepSAD_results_{seed_i+1}.json' + '\n')

    # save model
    jointDeepSAD.save_model(OUTPUT_PATH +
                            f'model/JointDeepSAD_model_{seed_i+1}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH +
                f'model/JointDeepSAD_model_{seed_i+1}.pt')
コード例 #4
0
def main(config_path):
    """
    Train a DSAD on the MURA dataset using a SimCLR pretraining.
    """
    # Load config file
    cfg = Config(settings=None)
    cfg.load_config(config_path)

    # Get path to output
    OUTPUT_PATH = cfg.settings['PATH']['OUTPUT'] + cfg.settings[
        'Experiment_Name'] + datetime.today().strftime('%Y_%m_%d_%Hh%M') + '/'
    # make output dir
    if not os.path.isdir(OUTPUT_PATH + 'models/'):
        os.makedirs(OUTPUT_PATH + 'model/', exist_ok=True)
    if not os.path.isdir(OUTPUT_PATH + 'results/'):
        os.makedirs(OUTPUT_PATH + 'results/', exist_ok=True)
    if not os.path.isdir(OUTPUT_PATH + 'logs/'):
        os.makedirs(OUTPUT_PATH + 'logs/', exist_ok=True)

    for seed_i, seed in enumerate(cfg.settings['seeds']):
        ############################### Set Up #################################
        # initialize logger
        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger()
        try:
            logger.handlers[1].stream.close()
            logger.removeHandler(logger.handlers[1])
        except IndexError:
            pass
        logger.setLevel(logging.INFO)
        formatter = logging.Formatter(
            '%(asctime)s | %(levelname)s | %(message)s')
        log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

        # print path
        logger.info(f"Log file : {log_file}")
        logger.info(f"Data path : {cfg.settings['PATH']['DATA']}")
        logger.info(f"Outputs path : {OUTPUT_PATH}" + "\n")

        # Set seed
        if seed != -1:
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            torch.backends.cudnn.deterministic = True
            logger.info(
                f"Set seed {seed_i+1:02}/{len(cfg.settings['seeds']):02} to {seed}"
            )

        # set number of thread
        if cfg.settings['n_thread'] > 0:
            torch.set_num_threads(cfg.settings['n_thread'])

        # check if GPU available
        cfg.settings['device'] = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        # Print technical info in logger
        logger.info(f"Device : {cfg.settings['device']}")
        logger.info(f"Number of thread : {cfg.settings['n_thread']}")

        ############################### Split Data #############################
        # Load data informations
        df_info = pd.read_csv(cfg.settings['PATH']['DATA_INFO'])
        df_info = df_info.drop(df_info.columns[0], axis=1)
        # remove low contrast images (all black)
        df_info = df_info[df_info.low_contrast == 0]

        # Train Validation Test Split
        spliter = MURA_TrainValidTestSplitter(
            df_info,
            train_frac=cfg.settings['Split']['train_frac'],
            ratio_known_normal=cfg.settings['Split']['known_normal'],
            ratio_known_abnormal=cfg.settings['Split']['known_abnormal'],
            random_state=42)
        spliter.split_data(verbose=False)
        train_df = spliter.get_subset('train')
        valid_df = spliter.get_subset('valid')
        test_df = spliter.get_subset('test')

        # print info to logger
        for key, value in cfg.settings['Split'].items():
            logger.info(f"Split param {key} : {value}")
        logger.info("Split Summary \n" +
                    str(spliter.print_stat(returnTable=True)))

        ############################# Build Model  #############################
        # make networks
        net_CLR = SimCLR_net(
            MLP_Neurons_layer=cfg.settings['SimCLR']['MLP_head'])
        net_CLR = net_CLR.to(cfg.settings['device'])
        net_DSAD = SimCLR_net(
            MLP_Neurons_layer=cfg.settings['DSAD']['MLP_head'])
        net_DSAD = net_DSAD.to(cfg.settings['device'])
        # print network architecture
        net_architecture = summary_string(
            net_CLR, (1, cfg.settings['Split']['img_size'],
                      cfg.settings['Split']['img_size']),
            batch_size=cfg.settings['SimCLR']['batch_size'],
            device=str(cfg.settings['device']))
        logger.info("SimCLR net architecture: \n" + net_architecture + '\n')
        net_architecture = summary_string(
            net_DSAD, (1, cfg.settings['Split']['img_size'],
                       cfg.settings['Split']['img_size']),
            batch_size=cfg.settings['DSAD']['batch_size'],
            device=str(cfg.settings['device']))
        logger.info("DSAD net architecture: \n" + net_architecture + '\n')

        # make model
        clr_DSAD = SimCLR_DSAD(net_CLR,
                               net_DSAD,
                               tau=cfg.settings['SimCLR']['tau'],
                               eta=cfg.settings['DSAD']['eta'])

        ############################# Train SimCLR #############################
        # make datasets
        train_dataset_CLR = MURADataset_SimCLR(
            train_df,
            data_path=cfg.settings['PATH']['DATA'],
            output_size=cfg.settings['Split']['img_size'],
            mask_img=True)
        valid_dataset_CLR = MURADataset_SimCLR(
            valid_df,
            data_path=cfg.settings['PATH']['DATA'],
            output_size=cfg.settings['Split']['img_size'],
            mask_img=True)
        test_dataset_CLR = MURADataset_SimCLR(
            test_df,
            data_path=cfg.settings['PATH']['DATA'],
            output_size=cfg.settings['Split']['img_size'],
            mask_img=True)

        logger.info("SimCLR Online preprocessing pipeline : \n" +
                    str(train_dataset_CLR.transform) + "\n")

        # Load model if required
        if cfg.settings['SimCLR']['model_path_to_load']:
            clr_DSAD.load_repr_net(
                cfg.settings['SimCLR']['model_path_to_load'],
                map_location=cfg.settings['device'])
            logger.info(
                f"SimCLR Model Loaded from {cfg.settings['SimCLR']['model_path_to_load']}"
                + "\n")

        # print Train parameters
        for key, value in cfg.settings['SimCLR'].items():
            logger.info(f"SimCLR {key} : {value}")

        # Train SimCLR
        clr_DSAD.train_SimCLR(
            train_dataset_CLR,
            valid_dataset=None,
            n_epoch=cfg.settings['SimCLR']['n_epoch'],
            batch_size=cfg.settings['SimCLR']['batch_size'],
            lr=cfg.settings['SimCLR']['lr'],
            weight_decay=cfg.settings['SimCLR']['weight_decay'],
            lr_milestones=cfg.settings['SimCLR']['lr_milestone'],
            n_job_dataloader=cfg.settings['SimCLR']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'])

        # Evaluate SimCLR to get embeddings
        clr_DSAD.evaluate_SimCLR(
            valid_dataset_CLR,
            batch_size=cfg.settings['SimCLR']['batch_size'],
            n_job_dataloader=cfg.settings['SimCLR']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'],
            set='valid')

        clr_DSAD.evaluate_SimCLR(
            test_dataset_CLR,
            batch_size=cfg.settings['SimCLR']['batch_size'],
            n_job_dataloader=cfg.settings['SimCLR']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'],
            set='test')

        # save repr net
        clr_DSAD.save_repr_net(OUTPUT_PATH + f'model/SimCLR_net_{seed_i+1}.pt')
        logger.info("SimCLR model saved at " + OUTPUT_PATH +
                    f"model/SimCLR_net_{seed_i+1}.pt")

        # save Results
        clr_DSAD.save_results(OUTPUT_PATH + f'results/results_{seed_i+1}.json')
        logger.info("Results saved at " + OUTPUT_PATH +
                    f"results/results_{seed_i+1}.json")

        ######################## Transfer Encoder Weight #######################

        clr_DSAD.transfer_encoder()

        ############################## Train DSAD ##############################
        # make dataset
        train_dataset_AD = MURA_Dataset(
            train_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'])
        valid_dataset_AD = MURA_Dataset(
            valid_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'])
        test_dataset_AD = MURA_Dataset(
            test_df,
            data_path=cfg.settings['PATH']['DATA'],
            load_mask=True,
            load_semilabels=True,
            output_size=cfg.settings['Split']['img_size'])

        logger.info("DSAD Online preprocessing pipeline : \n" +
                    str(train_dataset_AD.transform) + "\n")

        # Load model if required
        if cfg.settings['DSAD']['model_path_to_load']:
            clr_DSAD.load_AD(cfg.settings['DSAD']['model_path_to_load'],
                             map_location=cfg.settings['device'])
            logger.info(
                f"DSAD Model Loaded from {cfg.settings['DSAD']['model_path_to_load']} \n"
            )

        # print Train parameters
        for key, value in cfg.settings['DSAD'].items():
            logger.info(f"DSAD {key} : {value}")

        # Train DSAD
        clr_DSAD.train_AD(
            train_dataset_AD,
            valid_dataset=valid_dataset_AD,
            n_epoch=cfg.settings['DSAD']['n_epoch'],
            batch_size=cfg.settings['DSAD']['batch_size'],
            lr=cfg.settings['DSAD']['lr'],
            weight_decay=cfg.settings['DSAD']['weight_decay'],
            lr_milestone=cfg.settings['DSAD']['lr_milestone'],
            n_job_dataloader=cfg.settings['DSAD']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'])
        logger.info('--- Validation')
        clr_DSAD.evaluate_AD(
            valid_dataset_AD,
            batch_size=cfg.settings['DSAD']['batch_size'],
            n_job_dataloader=cfg.settings['DSAD']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'],
            set='valid')
        logger.info('--- Test')
        clr_DSAD.evaluate_AD(
            test_dataset_AD,
            batch_size=cfg.settings['DSAD']['batch_size'],
            n_job_dataloader=cfg.settings['DSAD']['num_worker'],
            device=cfg.settings['device'],
            print_batch_progress=cfg.settings['print_batch_progress'],
            set='test')

        # save DSAD
        clr_DSAD.save_AD(OUTPUT_PATH + f'model/DSAD_{seed_i+1}.pt')
        logger.info("model saved at " + OUTPUT_PATH +
                    f"model/DSAD_{seed_i+1}.pt")

        ########################## Save Results ################################
        # save Results
        clr_DSAD.save_results(OUTPUT_PATH + f'results/results_{seed_i+1}.json')
        logger.info("Results saved at " + OUTPUT_PATH +
                    f"results/results_{seed_i+1}.json")

    # save config file
    cfg.settings['device'] = str(cfg.settings['device'])
    cfg.save_config(OUTPUT_PATH + 'config.json')
    logger.info("Config saved at " + OUTPUT_PATH + "config.json")
コード例 #5
0
def main(seed_i):
    """
    Implementation of the unsupervised DROCC model proposed by Goyal et al (2020).
    The model uses a binary classifier with a ResNet18 backbone. The output is a
    logit that serves as anomaly score. The loss is computed as Binary Cross
    Entropy loss with logit. The training consist of few epoch trained only with
    the normal samples. Then each epoch starts with the generation of adversarial
    examples. The adversarial search is performed only on normal samples as we
    want the network to learn the manifold of normal samples. It uses a slightly
    modifided projection gradient descent algorithm. Then the samples and
    adversarial samples are passed through the network similarly to a standard
    classification task. Note that the input samples are masked with the mask
    generated in the preprocesing steps.
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path and main docstring with experiment summary
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(
        df_info,
        train_frac=train_frac,
        ratio_known_normal=ratio_known_normal,
        ratio_known_abnormal=ratio_known_abnormal,
        random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    valid_dataset = MURA_Dataset(valid_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    test_dataset = MURA_Dataset(test_df,
                                data_path=DATA_PATH,
                                load_mask=True,
                                load_semilabels=True,
                                output_size=img_size)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' +
                str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    seed = seeds[seed_i]
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        logger.info(f'Set seed {seed_i+1:02}/{n_seeds:02} to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(
        f'Number of dataloader worker for {Experiment_Name} : {n_jobs_dataloader}'
        + '\n')

    ######################### Networks Initialization ##########################
    net = ResNet18_binary(pretrained=pretrain)
    net = net.to(device)

    # add info to logger
    logger.info(f'Network : {net.__class__.__name__}')
    logger.info(f'ResNet18 pretrained on ImageNet : {pretrain}')
    logger.info('Network architecture: \n' +
                summary_string(net, (1, img_size, img_size),
                               device=str(device),
                               batch_size=batch_size) + '\n')

    # initialization of the Model
    drocc = DROCC(net, r)

    if model_path_to_load:
        drocc.load_model(model_path_to_load, map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ################################ Training ##################################
    # add parameter info
    logger.info(f'{Experiment_Name} radius r : {r}')
    logger.info(f'{Experiment_Name} gamma : {gamma}')
    logger.info(f'{Experiment_Name} adversarial importance mu : {mu}')
    logger.info(f'{Experiment_Name} number of initial epoch : {n_epoch_init}')
    logger.info(f'{Experiment_Name} number of epoch : {n_epoch}')
    logger.info(
        f'{Experiment_Name} number of adversarial search epoch: {n_epoch_adv}')
    logger.info(f'{Experiment_Name} learning rate : {lr}')
    logger.info(
        f'{Experiment_Name} adversarial search learning rate : {lr_adv}')
    logger.info(f'{Experiment_Name} learning rate milestone : {lr_milestone}')
    logger.info(f'{Experiment_Name} weight_decay : {weight_decay}')
    logger.info(f'{Experiment_Name} optimizer : Adam')
    logger.info(f'{Experiment_Name} batch_size {batch_size}')
    logger.info(
        f'{Experiment_Name} number of dataloader worker : {n_jobs_dataloader}')

    # train DROCC
    drocc.train(train_dataset,
                gamma=gamma,
                mu=mu,
                lr=lr,
                lr_adv=lr_adv,
                lr_milestone=lr_milestone,
                weight_decay=weight_decay,
                n_epoch=n_epoch,
                n_epoch_init=n_epoch_init,
                n_epoch_adv=n_epoch_adv,
                batch_size=batch_size,
                device=device,
                n_jobs_dataloader=n_jobs_dataloader,
                LFOC=use_LFOC,
                print_batch_progress=print_batch_progress)

    # validate DROCC
    drocc.validate(valid_dataset,
                   device=device,
                   n_jobs_dataloader=n_jobs_dataloader,
                   print_batch_progress=print_batch_progress)

    # test DROCC
    drocc.test(test_dataset,
               device=device,
               n_jobs_dataloader=n_jobs_dataloader,
               print_batch_progress=print_batch_progress)

    # save results
    drocc.save_results(OUTPUT_PATH +
                       f'results/{Experiment_Name}_results_{seed_i+1}.json')
    logger.info('Test results saved at ' + OUTPUT_PATH +
                f'results/{Experiment_Name}_results_{seed_i+1}.json' + '\n')

    # save model
    drocc.save_model(OUTPUT_PATH +
                     f'model/{Experiment_Name}_model_{seed_i+1}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH +
                f'model/{Experiment_Name}_model_{seed_i+1}.pt')
コード例 #6
0
def main(seed_i):
    """
    Extension of the deep multi-sphere SVDD to semi-supervised settings inpired
    from the DSAD of Ruff et al. (2020).

    MSAD loss changed to use the sqrt(dist) for normal samples and 1/(dist^2)
    for abnormal samples. The network is pretrained for longer (30 epochs) to get
    a better KMeans initialization. Anomaly score is dist - R
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path and main docstring with experiment summary
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(
        df_info,
        train_frac=train_frac,
        ratio_known_normal=ratio_known_normal,
        ratio_known_abnormal=ratio_known_abnormal,
        random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    valid_dataset = MURA_Dataset(valid_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    test_dataset = MURA_Dataset(test_df,
                                data_path=DATA_PATH,
                                load_mask=True,
                                load_semilabels=True,
                                output_size=img_size)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' +
                str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    seed = seeds[seed_i]
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        logger.info(f'Set seed {seed_i+1:02}/{n_seeds:02} to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(
        f'Number of dataloader worker for {Experiment_Name} : {n_jobs_dataloader}'
        + '\n')

    ######################### Networks Initialization ##########################
    net = AE_SVDD_Hybrid(pretrain_ResNetEnc=ae_pretrain,
                         output_channels=ae_out_size[0],
                         return_svdd_embed=True)
    net = net.to(device)

    # add info to logger
    logger.info(f'Network : {net.__class__.__name__}')
    logger.info(f'Autoencoder pretrained on ImageNet : {ae_pretrain}')
    logger.info('Network architecture: \n' +
                summary_string(net, (1, img_size, img_size),
                               device=str(device),
                               batch_size=batch_size) + '\n')

    # initialization of the Model
    jointDMSAD = joint_DMSAD(net, eta=eta, gamma=gamma)

    if model_path_to_load:
        jointDMSAD.load_model(model_path_to_load, map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ################################ Training ##################################
    # add parameter info
    logger.info(f'{Experiment_Name} eta : {eta}')
    logger.info(f'{Experiment_Name} gamma : {gamma}')
    logger.info(f'{Experiment_Name} number of epoch : {n_epoch}')
    logger.info(
        f'{Experiment_Name} number of pretraining epoch: {n_epoch_pretrain}')
    logger.info(
        f'{Experiment_Name} number of initial hypersphere: {n_sphere_init}')
    logger.info(f'{Experiment_Name} learning rate : {lr}')
    logger.info(f'{Experiment_Name} learning rate milestones : {lr_milestone}')
    logger.info(f'{Experiment_Name} weight_decay : {weight_decay}')
    logger.info(f'{Experiment_Name} optimizer : Adam')
    logger.info(f'{Experiment_Name} batch_size {batch_size}')
    logger.info(
        f'{Experiment_Name} number of dataloader worker : {n_jobs_dataloader}')
    logger.info(
        f'{Experiment_Name} criterion weighting : {criterion_weight[0]} Reconstruction loss + {criterion_weight[1]} MSAD embdedding loss'
    )
    logger.info(
        f'{Experiment_Name} reset scaling epoch : {reset_scaling_epoch}')

    # train DMSAD
    jointDMSAD.train(train_dataset,
                     valid_dataset=valid_dataset,
                     n_sphere_init=n_sphere_init,
                     n_epoch=n_epoch,
                     n_epoch_pretrain=n_epoch_pretrain,
                     lr=lr,
                     weight_decay=weight_decay,
                     lr_milestone=lr_milestone,
                     criterion_weight=criterion_weight,
                     reset_scaling_epoch=reset_scaling_epoch,
                     batch_size=batch_size,
                     n_jobs_dataloader=n_jobs_dataloader,
                     device=device,
                     print_batch_progress=print_batch_progress)

    # validate DMSAD
    jointDMSAD.validate(valid_dataset,
                        batch_size=batch_size,
                        n_jobs_dataloader=n_jobs_dataloader,
                        criterion_weight=criterion_weight,
                        device=device,
                        print_batch_progress=print_batch_progress)

    # test DMSAD
    jointDMSAD.test(test_dataset,
                    batch_size=batch_size,
                    n_jobs_dataloader=n_jobs_dataloader,
                    criterion_weight=criterion_weight,
                    device=device,
                    print_batch_progress=print_batch_progress)

    # save results
    jointDMSAD.save_results(
        OUTPUT_PATH + f'results/{Experiment_Name}_results_{seed_i+1}.json')
    logger.info('Test results saved at ' + OUTPUT_PATH +
                f'results/{Experiment_Name}_results_{seed_i+1}.json' + '\n')

    # save model
    jointDMSAD.save_model(OUTPUT_PATH +
                          f'model/{Experiment_Name}_model_{seed_i+1}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH +
                f'model/{Experiment_Name}_model_{seed_i+1}.pt')
コード例 #7
0
def main(seed_i):
    """
    Train a DeepSAD model following Lukas Ruff et al. (2019) work and code structure
    adapted to the MURA dataset (preprocessing inspired from the work of Davletshina
    et al. (2020)). The DeepSAD network structure is a ResNet18 Encoder. The Encoder
    is pretrained via Autoencoder training. The Autoencoder itself is not initialized
    with weights trained on ImageNet. The best threshold on the scores is defined
    using the validation set as the one maximizing the F1-score. The ROC AUC is
    reported on the test and validation set.
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    if Note is not None: logger.info(Note + '\n')
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(
        df_info,
        train_frac=train_frac,
        ratio_known_normal=ratio_known_normal,
        ratio_known_abnormal=ratio_known_abnormal,
        random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    valid_dataset = MURA_Dataset(valid_df,
                                 data_path=DATA_PATH,
                                 load_mask=True,
                                 load_semilabels=True,
                                 output_size=img_size)
    test_dataset = MURA_Dataset(test_df,
                                data_path=DATA_PATH,
                                load_mask=True,
                                load_semilabels=True,
                                output_size=img_size)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' +
                str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    seed = seeds[seed_i]
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        logger.info(f'Set seed {seed_i+1:02}/{n_seeds:02} to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(
        f'Number of dataloader worker for DeepSAD : {n_jobs_dataloader}')
    logger.info(
        f'Autoencoder number of dataloader worker : {ae_n_jobs_dataloader}' +
        '\n')

    ######################### Networks Initialization ##########################
    ae_net = AE_ResNet18(embed_dim=embed_dim,
                         pretrain_ResNetEnc=ae_pretrain,
                         output_size=ae_out_size)
    ae_net = ae_net.to(device)
    net = ResNet18_Encoder(embed_dim=embed_dim, pretrained=False)
    net = net.to(device)

    # initialization of the Model
    deepSAD = DeepSAD(net, ae_net=ae_net, eta=eta)
    # add info to logger
    logger.info(f'Autoencoder : {ae_net.__class__.__name__}')
    logger.info(f'Encoder : {net.__class__.__name__}')
    logger.info(f'Embedding dimension : {embed_dim}')
    logger.info(f'Autoencoder pretrained on ImageNet : {ae_pretrain}')
    logger.info(f'DeepSAD eta : {eta}')
    logger.info(
        'Autoencoder architecture: \n' +
        summary_string(ae_net, (1, img_size, img_size), device=str(device)) +
        '\n')

    if model_path_to_load:
        deepSAD.load_model(model_path_to_load,
                           load_ae=True,
                           map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ############################## Pretraining #################################
    logger.info(f'Pretraining DeepSAD via Autoencoder : {pretrain}')
    if pretrain:
        # add parameter info
        logger.info(f'Autoencoder number of epoch : {ae_n_epoch}')
        logger.info(f'Autoencoder learning rate : {ae_lr}')
        logger.info(f'Autoencoder learning rate milestone : {ae_lr_milestone}')
        logger.info(f'Autoencoder weight_decay : {ae_weight_decay}')
        logger.info(f'Autoencoder optimizer : Adam')
        logger.info(f'Autoencoder batch_size {ae_batch_size}' + '\n')
        # train AE
        deepSAD.pretrain(train_dataset,
                         valid_dataset,
                         test_dataset,
                         lr=ae_lr,
                         n_epoch=ae_n_epoch,
                         lr_milestone=ae_lr_milestone,
                         batch_size=ae_batch_size,
                         weight_decay=ae_weight_decay,
                         device=device,
                         n_jobs_dataloader=ae_n_jobs_dataloader,
                         print_batch_progress=print_batch_progress)

    ################################ Training ##################################
    # add parameter info
    logger.info(f'DeepSAD number of epoch : {n_epoch}')
    logger.info(f'DeepSAD learning rate : {lr}')
    logger.info(f'DeepSAD learning rate milestone : {lr_milestone}')
    logger.info(f'DeepSAD weight_decay : {weight_decay}')
    logger.info(f'DeepSAD optimizer : Adam')
    logger.info(f'DeepSAD batch_size {batch_size}')
    logger.info(f'DeepSAD number of dataloader worker : {n_jobs_dataloader}' +
                '\n')

    # train DeepSAD
    deepSAD.train(train_dataset,
                  lr=lr,
                  n_epoch=n_epoch,
                  lr_milestone=lr_milestone,
                  batch_size=batch_size,
                  weight_decay=weight_decay,
                  device=device,
                  n_jobs_dataloader=n_jobs_dataloader,
                  print_batch_progress=print_batch_progress)

    # validate DeepSAD
    deepSAD.validate(valid_dataset,
                     device=device,
                     n_jobs_dataloader=n_jobs_dataloader,
                     print_batch_progress=print_batch_progress)

    # test DeepSAD
    deepSAD.test(test_dataset,
                 device=device,
                 n_jobs_dataloader=n_jobs_dataloader,
                 print_batch_progress=print_batch_progress)

    # save results
    deepSAD.save_results(OUTPUT_PATH +
                         f'results/DeepSAD_results_{seed_i+1}.json')
    logger.info('Test results saved at ' + OUTPUT_PATH +
                f'results/DeepSAD_results_{seed_i+1}.json' + '\n')
    # save model
    deepSAD.save_model(OUTPUT_PATH + f'model/DeepSAD_model_{seed_i+1}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH +
                f'model/DeepSAD_model_{seed_i+1}.pt')
コード例 #8
0
def main(seed_i):
    """
    Implementation of the unsupervised ARAE model proposed by Salehi et al (2020).
    This unsupervised method apply a projected gradient descent algorithm to find
    a more meaningful lattent space for the autoencoder. The encoder composed of
    a ResNet18 encoder. The decoder is composed of a mirrored ResNet18. The latent
    space has dimension (16,16,512).
    """
    # initialize logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    try:
        logger.handlers[1].stream.close()
        logger.removeHandler(logger.handlers[1])
    except IndexError:
        pass
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
    log_file = OUTPUT_PATH + 'logs/' + f'log_{seed_i+1}.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # print path and main docstring with experiment summary
    logger.info('Brief summary of experiment : \n' + main.__doc__)
    logger.info(f'Log file : {log_file}')
    logger.info(f'Data path : {DATA_PATH}')
    logger.info(f'Outputs path : {OUTPUT_PATH}' + '\n')

    ############################## Make datasets ###############################
    # load data_info
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]
    # keep only hands
    df_info = df_info[df_info.body_part == 'HAND']

    # Train Validation Test Split
    spliter = MURA_TrainValidTestSplitter(df_info, train_frac=train_frac,
                                          ratio_known_normal=ratio_known_normal,
                                          ratio_known_abnormal=ratio_known_abnormal, random_state=42)
    spliter.split_data(verbose=False)
    train_df = spliter.get_subset('train')
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    train_dataset = MURA_Dataset(train_df, data_path=DATA_PATH, load_mask=True,
                                 load_semilabels=True, output_size=img_size)
    valid_dataset = MURA_Dataset(valid_df, data_path=DATA_PATH, load_mask=True,
                                 load_semilabels=True, output_size=img_size)
    test_dataset = MURA_Dataset(test_df, data_path=DATA_PATH, load_mask=True,
                                 load_semilabels=True, output_size=img_size)
    # print info to logger
    logger.info(f'Train fraction : {train_frac:.0%}')
    logger.info(f'Fraction knonw normal : {ratio_known_normal:.0%}')
    logger.info(f'Fraction known abnormal : {ratio_known_abnormal:.0%}')
    logger.info('Split Summary \n' + str(spliter.print_stat(returnTable=True)))
    logger.info('Online preprocessing pipeline : \n' + str(train_dataset.transform) + '\n')

    ################################ Set Up ####################################
    # Set seed
    seed = seeds[seed_i]
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        logger.info(f'Set seed {seed_i+1:02}/{n_seeds:02} to {seed}')

    # set number of thread
    if n_thread > 0:
        torch.set_num_threads(n_thread)

    # print info in logger
    logger.info(f'Device : {device}')
    logger.info(f'Number of thread : {n_thread}')
    logger.info(f'Number of dataloader worker for {Experiment_Name} : {n_jobs_dataloader}' + '\n')

    ######################### Networks Initialization ##########################
    net = AE_ResNet18(pretrain_ResNetEnc=pretrain, output_channels=ae_output_size[0])
    net = net.to(device)

    # add info to logger
    logger.info(f'Network : {net.__class__.__name__}')
    logger.info(f'ResNet18 pretrained on ImageNet : {pretrain}')
    logger.info('Network architecture: \n' + summary_string(net, (1, img_size, img_size), device=str(device), batch_size=batch_size) + '\n')

    # initialization of the Model
    arae = ARAE(net, gamma, epsilon)

    if model_path_to_load:
        arae.load_model(model_path_to_load, map_location=device)
        logger.info(f'Model Loaded from {model_path_to_load}' + '\n')

    ################################ Training ##################################
    # add parameter info
    logger.info(f'{Experiment_Name} epsilon : {epsilon}')
    logger.info(f'{Experiment_Name} adversarial importance gamma : {gamma}')
    logger.info(f'{Experiment_Name} number of epoch : {n_epoch}')
    logger.info(f'{Experiment_Name} number of adversarial search epoch: {n_epoch_adv}')
    logger.info(f'{Experiment_Name} learning rate : {lr}')
    logger.info(f'{Experiment_Name} adversarial search learning rate : {lr_adv}')
    method_adv = 'PGD' if use_PGD else 'FGSM'
    logger.info(f'{Experiment_Name} adversarial search method : {method_adv}')
    logger.info(f'{Experiment_Name} learning rate milestone : {lr_milestone}')
    logger.info(f'{Experiment_Name} weight_decay : {weight_decay}')
    logger.info(f'{Experiment_Name} optimizer : Adam')
    logger.info(f'{Experiment_Name} batch_size {batch_size}')
    logger.info(f'{Experiment_Name} number of dataloader worker : {n_jobs_dataloader}')

    # train DROCC
    arae.train(train_dataset, lr=lr, lr_adv=lr_adv, lr_milestone=lr_milestone,
                weight_decay=weight_decay, n_epoch=n_epoch, n_epoch_adv=n_epoch_adv, use_PGD=use_PGD,
                batch_size=batch_size, device=device, n_jobs_dataloader=n_jobs_dataloader,
                print_batch_progress=print_batch_progress, valid_dataset=valid_dataset)

    # validate DROCC
    arae.validate(valid_dataset, device=device,
                   n_jobs_dataloader=n_jobs_dataloader,
                   print_batch_progress=print_batch_progress)

    # test DROCC
    arae.test(test_dataset, device=device,
               n_jobs_dataloader=n_jobs_dataloader,
               print_batch_progress=print_batch_progress)

    # save results
    arae.save_results(OUTPUT_PATH + f'results/{Experiment_Name}_results_{seed_i+1}.json')
    logger.info('Test results saved at ' + OUTPUT_PATH + f'results/{Experiment_Name}_results_{seed_i+1}.json' + '\n')

    # save model
    arae.save_model(OUTPUT_PATH + f'model/{Experiment_Name}_model_{seed_i+1}.pt')
    logger.info('Model saved at ' + OUTPUT_PATH + f'model/{Experiment_Name}_model_{seed_i+1}.pt')