Esempio n. 1
0
    # read configure file
    with open(args.configure) as f:
        cfg = json.load(f)

    # comment for this experiment: leave here
    comment = cfg["session"]["sess_name"]

    # create dir to save log and checkpoint
    save_path = cfg['train']['save_path']
    time_str = str(datetime.now().strftime("%Y%m%d-%H%M"))
    sess_name = cfg["session"]["sess_name"]
    log_dir = make_dir_epoch_time(save_path, sess_name, time_str)

    # create logger
    log_file = logger.make_file(log_dir, 'result.txt')
    logger.log_initilize(log_file)
    tb_writer = logger.make_writer(log_dir=log_dir)
    logging.info(
        f"Start Tensorboard with tensorboard --logdir {log_dir}, view at http://localhost:6006/"
    )
    print("--------All checkpoint will be saved to ```{}```--------".format(
        log_dir))

    cls_model = model_loader(cfg)
    print("Successfully imported model module")
    print("Done Loading!!!\n")
    time.sleep(2.7)

    main(
        config=cfg,
Esempio n. 2
0
def main():
    # read configure file
    with open("cfgs/tenes.cfg") as f:
        cfg = json.load(f)

    # using parsed configurations to create a dataset
    data = cfg["data"]["data_csv_name"]
    data_path = cfg["data"]["data_path"]
    batch_size = int(cfg["data"]["batch_size"])
    validation_split = float(cfg["data"]["validation_ratio"])
    # create dataset
    training_set = pd.read_csv(data, usecols=["image_name", "target"])
    training_set["image_name"] = training_set["image_name"] + '.jpg'
    training_set = shuffle(training_set)
    training_set = training_set.sample(25000)

    print(training_set['target'].value_counts())
    train, test, _, _ = dataloader.data_split(training_set, validation_split)

    training_set = dataloader.ClassificationDataset(train, data_path,
                                                    transform.train_transform)

    testing_set = dataloader.ClassificationDataset(test, data_path,
                                                   transform.val_transform)
    # create dataloaders
    # global train_loader
    # global val_loader
    train_loader = torch.utils.data.DataLoader(
        training_set,
        batch_size=batch_size,
        shuffle=True,
    )
    val_loader = torch.utils.data.DataLoader(
        testing_set,
        batch_size=batch_size,
        shuffle=False,
    )

    logging.info("Dataset and Dataloaders created")
    # create a model
    extractor_name = cfg["train"]["extractor"]
    model = cls.ClassificationModel(model_name=extractor_name).create_model()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logging.info("Using device: {} ".format(device))
    # convert to suitable device
    # global model
    model = model.to(device)
    logging.info("Model created...")

    # create a metric for evaluating
    # global train_metrics
    # global val_metrics
    train_metrics = metrics.Metrics(cfg["train"]["metrics"])
    val_metrics = metrics.Metrics(cfg["train"]["metrics"])
    print("Metrics implemented successfully")

    # method to optimize the model
    # read settings from json file
    loss_function = cfg["optimizer"]["loss"]
    optimizers = cfg["optimizer"]["name"]
    learning_rate = cfg["optimizer"]["lr"]

    # initlize optimizing methods : lr, scheduler of lr, optimizer
    try:
        # if the loss function comes from nn package
        criterion = getattr(
            nn, loss_function,
            "The loss {} is not available".format(loss_function))
    except:
        # use custom loss
        criterion = getattr(
            custom_loss,
            loss_function,
            "The loss {} is not available".format(loss_function),
        )
    criterion = custom_loss.FocalLoss()
    optimizer = getattr(torch.optim, optimizers,
                        "The optimizer {} is not available".format(optimizers))
    optimizer = optimizer(model.parameters(), lr=learning_rate)
    save_method = cfg["train"]["lr_scheduler_factor"]
    patiences = cfg["train"]["patience"]
    lr_factor = cfg["train"]["reduce_lr_factor"]
    scheduler = ReduceLROnPlateau(optimizer,
                                  save_method,
                                  patience=patiences,
                                  factor=lr_factor)

    # before training, let's create a file for logging model result
    time_str = str(datetime.now().strftime("%Y%m%d-%H%M"))
    log_file = logger.make_file(cfg["session"]["sess_name"], time_str)
    logger.log_initilize(log_file)
    print("Beginning training...")
    # export the result to log file
    logging.info("-----")
    logging.info("session name: {} \n".format(cfg["session"]["sess_name"]))
    logging.info("Training size: " + str(len(train)))
    logging.info("Validation size: " + str(len(test)))
    logging.info(model)
    logging.info("\n")
    logging.info("CONFIGS \n")
    # logging the configs:

    # training models
    num_epoch = int(cfg["train"]["num_epoch"])
    best_val_acc = 0
    for i in range(0, num_epoch):
        loss, val_loss, train_result, val_result = trainer.train_one_epoch(
            model,
            train_loader,
            val_loader,
            device,
            optimizer,
            criterion,
            train_metrics,
            val_metrics,
        )

        # lr scheduling
        scheduler.step(val_loss)
        logging.info(
            "Epoch {} / {} \n Training loss: {} - Other training metrics: ".
            format(i + 1, num_epoch, loss))
        logging.info(train_result)
        logging.info(
            " \n Validation loss : {} - Other validation metrics:".format(
                val_loss))
        logging.info(val_result)
        logging.info("\n")
        # saving epoch with best validation accuracy
        if best_val_acc < float(val_result["f1_score"]):
            logging.info("Validation f1= " + str(val_result["f1_score"]) +
                         "===> Save best epoch")
            best_val_acc = val_result["f1_score"]
            torch.save(
                model.state_dict(),
                "saved/models/" + time_str + "-" +
                cfg["train"]["save_as_name"],
            )
        else:
            logging.info("Validation f1= " + str(val_result["f1_score"]) +
                         "===> No saving")
            continue

    # testing on test set
    test_data = cfg["data"]["test_csv_name"]
    data_path = cfg["data"]["data_path"]
    test_df = pd.read_csv(test_data, usecols=["image_name", "target"])
    test_df['image_name'] = test_df['image_name'] + '.jpg'
    # prepare the dataset
    testing_set = dataloader.TestDataset(test_df, 'dataset/test/test',
                                         transform.test_transform)

    # make dataloader
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    test_loader = torch.utils.data.DataLoader(testing_set,
                                              batch_size=16,
                                              shuffle=False)
    print("\n Inference on the testing set")

    # load the test model and making inference
    test_model = cls.ClassificationModel(
        model_name=extractor_name).create_model()
    model_path = os.path.join("saved/models",
                              time_str + "-" + cfg["train"]["save_as_name"])
    test_model.load_state_dict(torch.load(model_path))
    test_model = test_model.to(device)
    logging.info(tester.test_result(test_model, test_loader, device))
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(description='NA')
    parser.add_argument('-c',
                        '--configure',
                        default='cfgs/chexphoto.cfg',
                        help='JSON file')
    parser.add_argument('-cp',
                        '--checkpoint',
                        default=None,
                        help='checkpoint path')
    args = parser.parse_args()
    checkpoint = args.checkpoint
    # read configure file
    with open(args.configure) as f:
        cfg = json.load(f)
    time_str = str(datetime.now().strftime("%Y%m%d-%H%M"))
    tensorboard_writer = logger.make_writer(cfg["session"]["sess_name"],
                                            time_str)
    # using parsed configurations to create a dataset
    data = cfg["data"]["data_csv_name"]
    valid = cfg['data']['test_csv_name']
    data_path = cfg["data"]["data_path"]
    batch_size = int(cfg["data"]["batch_size"])
    validation_split = float(cfg["data"]["validation_ratio"])
    # create dataset
    training_set = pd.read_csv(data, usecols=["file_name", "label"])
    valid_set = pd.read_csv(valid, usecols=["file_name", "label"])
    # train, test, _, _ = dataloader.data_split(training_set, validation_split)

    training_set = dataloader.ClassificationDataset(training_set, data_path,
                                                    transform.train_transform)

    testing_set = dataloader.ClassificationDataset(valid_set, data_path,
                                                   transform.val_transform)
    # create dataloaders
    # global train_loader
    # global val_loader
    #SAmpler to prevent inbalance data label
    # train_loader = torch.utils.data.DataLoader(training_set,sampler=ImbalancedDatasetSampler(training_set, callback_get_label=lambda x, i: tuple(x[i][1].tolist())),batch_size=batch_size,)

    #End sampler
    train_loader = torch.utils.data.DataLoader(
        training_set,
        batch_size=batch_size,
        shuffle=True,
    )
    val_loader = torch.utils.data.DataLoader(
        testing_set,
        batch_size=batch_size,
        shuffle=False,
    )
    # val_loader = torch.utils.data.DataLoader(testing_set,sampler=ImbalancedDatasetSampler(testing_set, callback_get_label=lambda x, i: tuple(x[i][1].tolist())),batch_size=batch_size,)

    logging.info("Dataset and Dataloaders created")
    # create a model
    extractor_name = cfg["train"]["extractor"]
    model = cls.ClassificationModel(model_name=extractor_name).create_model()
    #load checkpoint to continue training
    if checkpoint is not None:
        print('...Load checkpoint from {}'.format(checkpoint))
        checkpoint = torch.load(checkpoint)
        model.load_state_dict(checkpoint)
        print('...Checkpoint loaded')

        classifier = nn.Sequential(nn.Linear(1408, 512, bias=True),
                                   nn.ReLU(inplace=True),
                                   nn.Linear(512, 6, bias=True))

        # create classfier
        # replace the last linear layer with your custom classifier
        # model._avg_pooling = SPPLayer([1,2,4])
        model._fc = classifier
        # model.last_linear = self.cls
        # select with layers to unfreeze
        params = list(model.parameters())
        len_param = len(params)
        # for index,param in enumerate(model.parameters()):
        #     if index == (len_param -1):
        #         param.requires_grad = True
        #     else:
        #         param.requires_grad = False
        # for param in model.parameters():
        #     print(param.requires_grad)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logging.info("Using device: {} ".format(device))
    # convert to suitable device
    # global model
    model = model.to(device)
    logging.info("Model created...")

    # create a metric for evaluating
    # global train_metrics
    # global val_metrics
    train_metrics = metrics.Metrics(cfg["train"]["metrics"])
    val_metrics = metrics.Metrics(cfg["train"]["metrics"])
    print("Metrics implemented successfully")

    # method to optimize the model
    # read settings from json file
    loss_function = cfg["optimizer"]["loss"]
    optimizers = cfg["optimizer"]["name"]
    learning_rate = cfg["optimizer"]["lr"]

    # initlize optimizing methods : lr, scheduler of lr, optimizer
    try:
        # if the loss function comes from nn package
        criterion = getattr(
            nn, loss_function,
            "The loss {} is not available".format(loss_function))

    except:
        # use custom loss
        criterion = getattr(
            custom_loss,
            loss_function,
            "The loss {} is not available".format(loss_function),
        )
    criterion = criterion()
    optimizer = getattr(torch.optim, optimizers,
                        "The optimizer {} is not available".format(optimizers))
    max_lr = 3e-3  # Maximum LR
    min_lr = 1e-5  # Minimum LR
    t_max = 10  # How many epochs to go from max_lr to min_lr
    # optimizer = torch.optim.Adam(
    # params=model.parameters(), lr=max_lr, amsgrad=False)
    optimizer = optimizer(model.parameters(), lr=learning_rate)
    save_method = cfg["train"]["lr_scheduler_factor"]
    patiences = cfg["train"]["patience"]
    lr_factor = cfg["train"]["reduce_lr_factor"]
    # scheduler = ReduceLROnPlateau(
    #     optimizer, save_method, patience=patiences, factor=lr_factor
    # )
    scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=min_lr)

    # before training, let's create a file for logging model result

    log_file = logger.make_file(cfg["session"]["sess_name"], time_str)

    logger.log_initilize(log_file)
    print("Beginning training...")
    # export the result to log file
    f = open("saved/logs/traning_{}.txt".format(cfg["session"]["sess_name"]),
             "a")
    logging.info("-----")
    logging.info("session name: {} \n".format(cfg["session"]["sess_name"]))
    logging.info(model)
    logging.info("\n")
    logging.info("CONFIGS \n")
    # logging the configs:
    # logging.info(f.read())
    # training models
    num_epoch = int(cfg["train"]["num_epoch"])
    best_val_acc = 0
    for i in range(0, num_epoch):
        loss, val_loss, train_result, val_result = trainer.train_one_epoch(
            model,
            train_loader,
            val_loader,
            device,
            optimizer,
            criterion,
            train_metrics,
            val_metrics,
        )

        # lr scheduling

        logging.info(
            "Epoch {} / {} \n Training loss: {} - Other training metrics: ".
            format(i + 1, num_epoch, loss))
        print("Epoch {} / {} \n Training acc: {} - Other training metrics: ".
              format(i + 1, num_epoch, train_result["accuracy_score"]))
        print("Epoch {} / {} \n Training loss: {} - Other training metrics: ".
              format(i + 1, num_epoch, loss))
        f.write(
            "Epoch {} / {} \n Training loss: {} - Other training metrics: ".
            format(i + 1, num_epoch, loss))
        f.write("Epoch {} / {} \n Training acc: {} - Other training metrics: ".
                format(i + 1, num_epoch, train_result["accuracy_score"]))
        tensorboard_writer.add_scalar("training accuracy",
                                      train_result["accuracy_score"], i + 1)
        tensorboard_writer.add_scalar("training f1_score",
                                      train_result["f1_score"], i + 1)
        tensorboard_writer.add_scalar("training metrics", loss, i + 1)
        logging.info(train_result)
        logging.info(
            " \n Validation loss : {} - Other validation metrics:".format(
                val_loss))
        print(
            "Epoch {} / {} \n valid acc: {} - Other training metrics: ".format(
                i + 1, num_epoch, val_result["accuracy_score"]))
        f.write(" \n Validation loss : {} - Other validation metrics:".format(
            val_loss))
        tensorboard_writer.add_scalar("valid accuracy",
                                      val_result["accuracy_score"], i + 1)
        tensorboard_writer.add_scalar("valid f1_score", val_result["f1_score"],
                                      i + 1)
        tensorboard_writer.add_scalar("valid metrics", val_loss, i + 1)
        logging.info(val_result)
        logging.info("\n")
        # saving epoch with best validation accuracy
        if best_val_acc < float(val_result["accuracy_score"]):
            logging.info("Validation accuracy= " +
                         str(val_result["accuracy_score"]) +
                         "===> Save best epoch")
            f.write("Validation accuracy= " +
                    str(val_result["accuracy_score"]) + "===> Save best epoch")
            best_val_acc = val_result["accuracy_score"]
            torch.save(
                model.state_dict(),
                "saved/models/" + time_str + "-" +
                cfg["train"]["save_as_name"],
            )
        scheduler.step(val_loss)
        # else:
        #     # logging.info(
        #     #     "Validation accuracy= "+ str(val_result["accuracy_score"])+ "===> No saving"
        #     # )
        #     continue

    # testing on test set
    test_data = cfg["data"]["test_csv_name"]
    data_path = cfg["data"]["data_path"]
    test_df = pd.read_csv(test_data, usecols=["file_name", "label"])

    # prepare the dataset
    testing_set = dataloader.ClassificationDataset(test_df, data_path,
                                                   transform.val_transform)

    # make dataloader
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    test_loader = torch.utils.data.DataLoader(testing_set,
                                              batch_size=32,
                                              shuffle=False)
    print("Inference on the testing set")

    # load the test model and making inference
    test_model = cls.ClassificationModel(
        model_name=extractor_name).create_model()
    model_path = os.path.join("saved/models",
                              time_str + "-" + cfg["train"]["save_as_name"])
    test_model.load_state_dict(torch.load(model_path))
    test_model = test_model.to(device)
    logging.info(tester.test_result(test_model, test_loader, device, cfg))
Esempio n. 4
0
    except:
        print("Cannot import data loader module".format(module_name))

    # choose model classification class
    module_name = cfg["train"]["model.class"]
    try:
        cls = getattr(model, module_name)
        print("Successfully imported model module")
    except:
        print("Cannot import model module".format(module_name))

    # get num of class
    num_of_class = len(cfg["data"]["label_dict"])
    # create logger
    time_str = str(datetime.now().strftime("%Y%m%d-%H%M"))
    log_file = logger.make_file(cfg["session"]["sess_name"], time_str)
    logger.log_initilize(log_file)

    for fold in fold_list:
        # running at fold n
        print("Currently running on {}".format(fold))
        time.sleep(3)
        main(
            comment=comment,
            checkpoint=checkpoint,
            collocation=collocation,
            model=cls,
            dataset=dataset,
            validation_flag=validation_flag,
            current_fold=fold,
            logger=logger,