Ejemplo n.º 1
0
 def __init__(self, device=0, logger_name=None):
     super().__init__(device=device)
     self.model = SEResnext50_32x4d(pretrained="imagenet")
     self.model.to(device)
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
     self.Engine = Engine(self.model, self.optimizer, device)
     self.logger = logging.getLogger(logger_name)
Ejemplo n.º 2
0
def predict(image_path, model):
    '''
    Make prediction using trained model
    '''

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean=MEAN,
                                 std=STD,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    test_images = [image_path]
    test_targets = [0]

    test_dataset = ClassificationDataset(image_paths=test_images,
                                         targets=test_targets,
                                         resize=None,
                                         augmentations=test_aug)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0)

    engine = Engine(model=model, optimizer=None, device=DEVICE)
    predictions = engine.predict(data_loader=test_loader)
    return np.vstack((predictions)).reshape(-1)
Ejemplo n.º 3
0
def train(fold):
    training_data_path = TRAINING_DATA_PATH
    df = pd.read_csv(TRAIN_FOLDS)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    model = ResNext101_64x4d(pretrained="imagenet")
    model.to(DEVICE)

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + ".jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_loader = train_dataloader(images=train_images, targets=train_targets)
    valid_loader = valid_dataloader(images=valid_images, targets=valid_targets)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           threshold=0.001,
                                                           mode="max")

    es = EarlyStopping(patience=3, mode="max")

    for epoch in range(EPOCHS):
        train_loss = Engine.train(train_loader,
                                  model,
                                  optimizer,
                                  device=DEVICE)
        predictions, valid_loss = Engine.evaluate(valid_loader,
                                                  model,
                                                  device=DEVICE)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)

        es(auc,
           model,
           model_path=os.path.join(MODEL_PATH, f"model_fold_{fold}.bin"))
        if es.early_stop:
            print("Early stopping")
            break
Ejemplo n.º 4
0
class SEResnext50_32x4dLearner(ActiveLearner):

    def __init__(self, device=0, logger_name=None):
        super().__init__(device=device)
        self.model = SEResnext50_32x4d(pretrained="imagenet")
        self.model.to(device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
        self.Engine = Engine(self.model, self.optimizer, device)
        self.logger = logging.getLogger(logger_name)

    def inference(self, dataset, bs=64):
        loader = torch.utils.data.DataLoader(
            dataset, batch_size=bs, shuffle=False, num_workers=4
        )
        predictions = self.Engine.predict(loader)
        predictions = np.vstack((predictions)).ravel()
        probabilities = 1 / (1 + np.exp(-predictions))
        probabilities = np.stack([probabilities, 1-probabilities], axis=1)
        return {'class_probabilities': probabilities}

    def fit(self, train_dataset, epochs=50, train_bs=32, **kwargs):
        labeled_targets = [x['targets'].numpy()
                           for x in tqdm.tqdm(train_dataset)]
        train_target_distrib = pd.value_counts(labeled_targets)
        self.logger.info('Targets labeled distribution :')
        self.logger.info(train_target_distrib)

        if self.cuda_available:
            self.model.cuda()
        self.model.train()
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
        )
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)

        for epoch in tqdm.tqdm(range(epochs)):
            train_loss = self.Engine.train(train_loader)
        return {'target_distribution': train_target_distrib}

    def score(self, valid_dataset, batch_size=64):
        self.model.eval()
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4
        )
        # valid_loss = self.Engine.evaluate(valid_loader)
        # print(f'Validation loss : {valid_loss:.3f}')
        predictions = self.Engine.predict(valid_loader)
        predictions = np.vstack((predictions)).ravel()
        valid_targets = valid_dataset.targets
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"AUC = {auc:.3f}")
        return {'auc': auc}
Ejemplo n.º 5
0
def predict(image_path, model):
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = [image_path]
    test_targets = [0]

    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentations=test_aug)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0)

    predictions = Engine.predict(test_loader, model, DEVICE)
    return np.vstack((predictions)).ravel()
Ejemplo n.º 6
0
def test(fold):
    test_img_path  = config.TEST_IMAGES
    device = config.DEVICE
    test_bs = config.TEST_BS

    df_test = pd.read_csv(config.TEST_FILE).reset_index(drop=True)
    model_ = model.SE_Resnext50_32x4d(pretrained=None)
    model_.load_state_dict(torch.load(os.path.join(config.MODEL_PATH, f"model_fold_{fold}.bin")))
    model_.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = alb.Compose([alb.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)])

    test_imgs = df_test.image_name.values.tolist()
    test_imgs = [os.path.join(test_img_path, i + ".jpg") for i in test_imgs]
    test_targets = np.zeros(len(test_imgs))

 
    test_dataset = ClassificationLoader(
        image_paths = test_imgs,
        targets=test_targets,
        resize=(256,256),
        augmentations=test_aug
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size= test_bs, shuffle=False, num_workers=4
    )

    predictions = Engine.predict(test_loader, model_, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Ejemplo n.º 7
0
def predict(image_path, model):

    aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    images = [image_path]
    targets = [0]

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=4
    )

    predictions = Engine.predict(test_loader, model, DEVICE)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Ejemplo n.º 8
0
def predict_result(image_path, model):

    test_images = [image_path]
    target = [0]

    test_loader = test_dataloader(images=test_images, targets=target)

    predictions = Engine.predict(test_loader, model=model, device=DEVICE)
    return np.vstack((predictions)).ravel()
Ejemplo n.º 9
0
def predict(fold):
    test_data_path = ''
    model_path = ""
    df_test = pd.read_csv("/.../test.csv")
    df_test.loc[:, 'target'] = 0

    epochs = 50
    device = 'cuda'
    test_bs = 32  #train batch size
    #valid_bs = 16

    #normalize image pixel values
    mean = (0.485, 0.456, 0.406)  #these values are for this model
    std = (0.229, 0.224, 0.225)

    #df_train = df[df.kfold != fold].reset_index(drop=True) #absolutely removes the previous index
    #df_valid = df[df.kfold == fold].reset_index(drop=True)

    #for image augmentation
    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(testing_data_path, i + '.jpg') for i in test_images
    ]
    test_targets = df_test.target.values

    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentation=test_aug)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=test_bs,
                                              shuffle=True,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}")))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device)
    return np.vstack((predictions)).ravel()
Ejemplo n.º 10
0
def predict(fold):
    # train image path
    test_data_path = input("Enter the test data path (resized image): ")
    # csv data path that was created from folds
    test_csv_path = input("Enter the test.csv file path: ")
    df_test = pd.read_csv(test_csv_path)
    df_test.loc[:, "target"] = 0
    model_path = "model/"
    device = "cuda"
    epochs = 30
    test_batch_size = 16
    mean = (0.485, 0.456, 0.225)
    standard_deviation = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    # train image mapping
    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(test_data_path, i + ".jpg") for i in test_images
    ]
    test_targets = df_test.target.values

    # create valid dataset
    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentations=test_aug)
    # validation data loader
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=test_batch_size,
                                              shuffle=False,
                                              num_workers=4)

    # import model
    model = SEResNext(pretrained='imagenet')
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model{fold}.bin")))

    model.to(device)
    #
    predictions = Engine.predict(test_loader, model, device)
    return np.vstack((predictions)).ravel()
Ejemplo n.º 11
0
def predict(fold):

    training_data_path = "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test224/"
    model_path = "/media/vatsal/Movies & Games/Melenoma-Deep-Learning/model/"
    df_test = pd.read_csv(
        "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test.csv"
    )
    df_test.loc[:, "target"] = 0

    device = "cuda"
    epochs = 50
    test_bs = 8
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(training_data_path, i + ".jpg") for i in test_images
    ]
    test_targets = df_test.target.values

    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=8,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()
    return predictions
def predict(fold):
    test_data_path = ""
    model_path = ""
    df_test = pd.read_csv("")
    df_test.loc[:, "target"] = 0
    device = "cuda"
    epochs = 50
    test_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.244, 0.255)

    test_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ]
    )

    test_images = df_test.image_name.values.tolist()
    test_images = [os.path.join(test_data_path, i + ".jpg") for i in test_images]
    test_targets = np.zeros(len(test_images))


    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=test_bs,
        shuffle=False,
        num_workers=4
    )

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(
        test_loader,
        model,
        device=device
    )
    return np.vstack((predictions)).ravel()
Ejemplo n.º 13
0
def evaluate_for_best_epoch(
    fold,
    model_path,
    device,
    valid_loader,
    model_name,
    valid_targets,
    epoch="final",
    meta_features=None,
):
    args = get_args()
    with open(args.config) as file:
        config_file = yaml.load(file, Loader=yaml.FullLoader)
    config = wandb.config  # Initialize config
    config.update(config_file)

    print(f"Evaluating on epoch {epoch} from {model_path}")
    model = get_model(
        config.model_backbone,
        config.model_name,
        config.num_classes,
        config.input_size,
        config.use_metadata,
        meta_features,
    )
    model.load_state_dict(torch.load(model_path))

    model.to(device, non_blocking=True)
    predictions, valid_loss = Engine.evaluate(
        valid_loader,
        model,
        device=device,
        wandb=wandb,
        epoch=epoch,
        upload_image=True,
        use_sigmoid=True,
    )
    predictions = np.vstack((predictions)).ravel()

    auc = metrics.roc_auc_score(valid_targets, predictions)
    oof_file = config.oof_file.replace(".npy", "_" + str(fold) + ".npy")
    np.save(oof_file, valid_targets)
    print(f"Epoch = {epoch}, AUC = {auc}")
    wandb.log({
        "best_valid_auc": auc,
    })
Ejemplo n.º 14
0
def get_tta_prediction(
    tta: int, test_loader, model, device: str, use_sigmoid: bool, prediction_length: int
) -> np.array:
    """
    get prediction with tta
    """
    prediction = np.zeros(prediction_length)
    for tta_id in range(tta):
        print(f"using tta at tta_id {tta_id}")
        predictions_tta = Engine.predict(
            test_loader, model, device=device, use_sigmoid=True
        )

        predictions_tta = np.vstack((predictions_tta)).ravel()
        prediction += predictions_tta
    prediction /= tta
    return prediction
Ejemplo n.º 15
0
def predict(fold):
    #config : path, df, device, epochs, train_bs
    test_path = img_path + "/test3/"
    df = pd.read_csv(data_path + "/test.csv")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    epochs = 1
    test_bs = 18
    df_test = df

    #Aug : mean, std, train_aug,valid_aug
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    #Dataset: train_imgs, train_targets, train_dataset, train_loader
    test_imgs = df.image_name.values.tolist()
    test_imgs = [test_path + file + ".jpg" for file in test_imgs]
    print(test_imgs[0])
    test_target = np.zeros(len(test_imgs))
    test_dataset = ClassificationLoader(image_paths=test_imgs,
                                        targets=test_target,
                                        resize=None,
                                        augmentations=aug)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=test_bs,
                                              shuffle=False,
                                              num_workers=4)

    #Model Utils: model, optimizer, scheduler, es
    model = SEResnext50_32x4d(pretrained=True)
    model_path = save_path + f"/200712_fold_{fold}.bin"
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)

    # train, predict
    preds = Engine.predict(test_loader, model, device=device)
    preds = np.vstack(preds).flatten()

    return preds
Ejemplo n.º 16
0
def predict(fold):
    test_data_path = "/home/prakhar/Desktop/ml/Melanoma_Detection/input/"
    model_path = "/home/prakhar/Desktop/ml/Melanoma_Detection/model_weights/"
    df_test = pd.read_csv(
        "/home/prakhar/Desktop/ml/Melanoma_Detection/input/test.csv")
    df_test.loc[:, "target"] = 0
    device = "cuda"
    epochs = 50
    test_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.255)
    # mean = (0.485, 0.456, 0.406)
    #   	std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(test_data_path, i + '.jpg') for i in test_images
    ]
    test_targets = df_test.target.values

    test_dataset = ClassificationDataLoader(image_paths=test_images,
                                            targets=test_targets,
                                            resize=None,
                                            augmentations=test_aug)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=valid_bs,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device)

    return np.vstack((predictions)).ravel()
Ejemplo n.º 17
0
def predict(fold):
    print(f"Predicting fold #{fold}")
    test_data_path = "/mnt/Data/MelanomaClassification/input/kaggle/working/test224/"
    df = pd.read_csv("/mnt/Data/MelanomaClassification/input/test.csv")
    device = "cuda"
    model_path = os.path.join("/mnt/Data/MelanomaClassification/models/",
                              f"model_fold_{fold}.bin")

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    images = df.image_name.values.tolist()
    images = [os.path.join(test_data_path, i + ".png") for i in images]
    targets = np.zeros(len(images))

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=16,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Ejemplo n.º 18
0
def predict(image_path, fold):

    device = "cpu"
    model_path = "../model/"

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = [image_path]
    test_targets = [0]

    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=8,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model_fold_{fold}.bin"))
    )  #if gets error here add map_location=torch.device('cpu')

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()
    print(predictions)
    return predictions[0]
Ejemplo n.º 19
0
def predict(fold):
    #df load
    test_data_path=img_path+"/test3/"
    df=pd.read_csv(data_path+"/test.csv")
    device="cuda" if torch.cuda.is_available() else "cpu"
    model_path=saved_path+f"/model_fold_{fold}.bin"
    test_bs=32

    #augmentation
    mean=(0.485, 0.456, 0.406)
    std=(0.229, 0.224, 0.225)
    aug=albumentations.Compose([
        albumentations.Normalize(mean,std,max_pixel_value=255.0,always_apply=True)
    ])

    #dataset
    images=df.image_name.values.tolist()
    images=[test_data_path+file+".jpg" for file in images]
    targets=np.zeros(len(images))
    test_dataset=ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    #data loader
    test_loader=torch.utils.data.DataLoader(
        test_dataset,batch_size=test_bs,shuffle=False,num_workers=4)

    #model
    model=SEResnext50_32x4d(pretrained=True)
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions=Engine.predict(test_loader,model,device)
    predictions=np.vstack((predictions)).ravel()

    return predictions
Ejemplo n.º 20
0
        image_paths=train_images,
        targets=train_targets,
        resize=(128, 128),
        augmentations=aug,
    ).fetch(batch_size=16, num_workers=4, drop_last=False, shuffle=True, tpu=False)

    valid_loader = ClassificationDataLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=(128, 128),
        augmentations=aug,
    ).fetch(batch_size=16, num_workers=4, drop_last=False, shuffle=False, tpu=False)

    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.6)
    eng = Engine(model, optimizer, device=args.device)

    for epoch in range(args.epochs):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{epoch}, Train Loss={train_loss} Valid Loss={valid_loss}")

    test_df = pd.read_csv(os.path.join(args.data_path, "test.csv"))
    images = test_df.image_id.values.tolist()
    images = [os.path.join(args.data_path, "images", i + ".jpg") for i in images]
    targets = np.zeros((len(images), 4))

    aug = albumentations.Compose(
        [albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)]
    )
Ejemplo n.º 21
0
def train(fold):
    args = get_args()
    with open(args.config) as file:
        config_file = yaml.load(file, Loader=yaml.FullLoader)

    wandb.init(
        project="siim2020",
        entity="siim_melanoma",
        # name=f"20200718-effb0-adamw-consineaneal-{fold}",
        name=f"2017-2018-rexnet-test-{fold}",
        #name=f"swav-test-{fold}",
        #name=f"RAdam-b6-384x384-{fold}"
    )
    config = wandb.config  # Initialize config
    config.update(config_file)
    device = config.device

    model_path = config.model_path.format(fold)

    seed_everything(config.seed)
    df = pd.read_csv(config.train_csv_fold)
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_train["image_name"] = config.training_data_path + df_train[
        "image_name"] + ".jpg"

    if config.supplement_data["use_supplement"]:
        print(f"training shape before merge {df_train.shape}")
        df_supplement = pd.read_csv(config.supplement_data["csv_file"])
        df_supplement = df_supplement[df_supplement["tfrecord"] % 2 == 0]
        df_supplement = df_supplement[df_supplement["target"] == 1]
        df_supplement["image_name"] = (config.supplement_data["file_path"] +
                                       df_supplement["image_name"] + ".jpg")
        df_train = pd.concat([df_train, df_supplement]).reset_index(drop=True)
        df_train = df_train.sample(
            frac=1, random_state=config.seed).reset_index(drop=True)
        del df_supplement
        print(f"training shape after merge {df_train.shape}")

    df_valid = df[df.kfold == fold].reset_index(drop=True)
    df_valid["image_name"] = config.training_data_path + df_valid[
        "image_name"] + ".jpg"

    if config.use_metadata:
        df_train, meta_features = get_meta_feature(df_train)
        df_valid, _ = get_meta_feature(df_valid)
    else:
        meta_features = None

    model = get_model(
        config.model_backbone,
        config.model_name,
        config.num_classes,
        config.input_size,
        config.use_metadata,
        meta_features,
    )

    model = model.to(config.device)
    print("watching model")
    wandb.watch(model, log="all")

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose([
        AdvancedHairAugmentation(hairs_folder="../input/melanoma-hairs/"),
        # albumentations.augmentations.transforms.CenterCrop(64, 64, p=0.8),
        albumentations.augmentations.transforms.RandomBrightnessContrast(),
        albumentations.augmentations.transforms.HueSaturationValue(),
        # Microscope(p=0.4),
        albumentations.augmentations.transforms.RandomResizedCrop(
            config.input_size, config.input_size, scale=(0.7, 1.0), p=0.4),
        albumentations.augmentations.transforms.VerticalFlip(p=0.4),
        albumentations.augmentations.transforms.Cutout(p=0.3),  # doesnt work
        albumentations.ShiftScaleRotate(shift_limit=0.0625,
                                        scale_limit=0.1,
                                        rotate_limit=15),
        albumentations.Flip(p=0.5),
        RandomAugMix(severity=7, width=7, alpha=5, p=0.3),
        # albumentations.augmentations.transforms.Resize(
        #    config.input_size, config.input_size, p=1
        # ),
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    train_images = df_train.image_name.values.tolist()
    # train_images = [
    #    os.path.join(config.training_data_path, i + ".jpg") for i in train_images
    # ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    # valid_images = [
    #    os.path.join(config.training_data_path, i + ".jpg") for i in valid_images
    # ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationDataset(
        image_paths=train_images,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
        meta_features=meta_features,
        df_meta_features=df_train,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.batch_size,
        # num_workers=4,
        num_workers=1,
        pin_memory=True,
        shuffle=True,
        #sampler=BalanceClassSampler(labels=train_targets, mode="upsampling"),
        drop_last=True,
    )

    valid_dataset = ClassificationDataset(
        image_paths=valid_images,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
        meta_features=meta_features,
        df_meta_features=df_valid,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.test_batch_size,
        shuffle=False,
        # num_workers=4,
        num_workers=1,
        pin_memory=True,
        # drop_last=True
    )

    #optimizer = torch.optim.AdamW(model.parameters(), lr=config.lr)
    optimizer = RAdam(model.parameters(), lr=config.lr)
    if config.swa["use_swa"]:
        optimizer = SWA(optimizer, swa_start=12, swa_freq=1)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=2,
                                                           threshold=0.0001,
                                                           mode="max")
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #    optimizer, len(train_loader) * config.epochs
    # )

    #scheduler = torch.optim.lr_scheduler.CyclicLR(
    #   optimizer,
    #   base_lr=config.lr / 10,
    #   max_lr=config.lr * 100,
    #   mode="triangular2",
    #   cycle_momentum=False,
    #)

    #scheduler = torch.optim.lr_scheduler.OneCycleLR(
    #    optimizer, max_lr=3e-3, steps_per_epoch=len(train_loader), epochs=config.epochs
    #)

    es = EarlyStopping(patience=6, mode="max")
    if config.fp16:
        print("************* using fp16 *************")
        scaler = GradScaler()
    else:
        scaler = False

    for epoch in range(config.epochs):
        train_loss = Engine.train(
            train_loader,
            model,
            optimizer,
            device=config.device,
            wandb=wandb,
            accumulation_steps=config.accumulation_steps,
            fp16=config.fp16,
            scaler=scaler,
        )
        predictions, valid_loss = Engine.evaluate(
            valid_loader,
            model,
            device=config.device,
            wandb=wandb,
            epoch=epoch,
            upload_image=False,
            use_sigmoid=True,
        )
        predictions = np.vstack((predictions)).ravel()

        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        wandb.log({
            "valid_auc": auc,
        })

        scheduler.step(auc)

        es(auc, model, model_path=model_path)
        if es.early_stop:
            print("Early stopping")
            break
    if config.swa["use_swa"]:
        print("saving the model using SWA")
        optimizer.swap_swa_sgd()
        torch.save(model.state_dict(), config.swa["model_path"].format(fold))

    evaluate_for_best_epoch(
        fold,
        model_path,
        config.device,
        valid_loader,
        config.model_name,
        valid_targets,
        "final",
        meta_features=meta_features,
    )
    if config.swa["use_swa"]:
        model_path = config.swa["model_path"].format(fold)
        evaluate_for_best_epoch(
            fold,
            model_path,
            config.device,
            valid_loader,
            config.model_name,
            valid_targets,
            "swa",
            meta_features=meta_features,
        )
Ejemplo n.º 22
0
def train(fold):
    #config
    train_path = img_path + "/train3/"
    df = pd.read_csv(data_path + "/train_folds.csv")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    epochs = 1
    train_bs = 32
    valid_bs = 32

    #df
    df_train = df[df.fold != fold].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)

    #Aug
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
        albumentations.ShiftScaleRotate(shift_limit=0.0625,
                                        scale_limit=0.1,
                                        rotate_limit=15),
        albumentations.VerticalFlip(p=0.5),
        albumentations.HorizontalFlip(p=0.5),
    ])
    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    #Dataset
    train_imgs = df_train.image_name.values.tolist()
    train_imgs = [img_path + "/train3/" + file + ".jpg" for file in train_imgs]
    train_targets = df_train.target.values

    valid_imgs = df_valid.image_name.values.tolist()
    valid_imgs = [img_path + "/train3/" + file + ".jpg" for file in valid_imgs]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(
        image_paths=train_imgs,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
    )
    valid_dataset = ClassificationLoader(
        image_paths=valid_imgs,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
    )
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=True,
                                               num_workers=4)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_bs,
                                               shuffle=True,
                                               num_workers=4)

    # model
    model = SEResnext50_32x4d(pretrained=True)
    model = model.to(device)
    model_path = save_path + f"/200712_fold_{fold}.bin"
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           threshold=0.001,
                                                           mode="max")
    es = EarlyStopping(patience=3, mode="max")

    for epoch in range(epochs):
        train_loss = Engine.train(train_loader,
                                  model,
                                  optimizer,
                                  device=device)
        preds, valid_loss = Engine.evaluate(valid_loader, model, device=device)
        preds = np.vstack(preds).ravel()
        auc = metrics.roc_auc_score(valid_targets, preds)
        print(f"Epoch {epoch}, AUC {auc}")
        scheduler.step(auc)
        es(auc, model, model_path=model_path)
        if es.early_stop:
            print("early stop")
            break
Ejemplo n.º 23
0
def train(fold):
    training_data_path = "../input/siic-isic-224x224-images/train/"
    df = pd.read_csv("/kaggle/working/train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    model = SEResnext50_32x4d(pretrained="imagenet")
    model.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".png") for i in train_images]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [os.path.join(training_data_path, i + ".png") for i in valid_images]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)

        es(auc, model, model_path=f"model_fold_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break
Ejemplo n.º 24
0
def train():
    df = pd.read_csv(DATA_PATH + "images_labeled.csv")
    X_train, X_test, y_train, y_test = train_test_split(df.image.values,
                                                        df.label.values,
                                                        test_size=0.3,
                                                        random_state=42,
                                                        shuffle=True)

    train_aug = albumentations.Compose([
        albumentations.Normalize(mean=MEAN,
                                 std=STD,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean=MEAN,
                                 std=STD,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    train_images = [os.path.join(DATA_PATH, filename) for filename in X_train]
    valid_images = [os.path.join(DATA_PATH, filename) for filename in X_test]

    train_dataset = ClassificationDataset(
        image_paths=train_images,
        targets=y_train,
        resize=None,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=TRAIN_BS,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationDataset(
        image_paths=valid_images,
        targets=y_test,
        resize=None,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=VALID_BS,
                                               shuffle=False,
                                               num_workers=4)

    model = CV_Model(pretrained="imagenet")
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           mode="max")

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(EPOCHS):
        engine = Engine(model=model, optimizer=optimizer, device=DEVICE)
        engine.train(train_loader)
        predictions = engine.predict(valid_loader)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(y_test, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)
        es(auc, model, model_path=os.path.join(MODEL_PATH, "model.bin"))
        if es.early_stop:
            print("Early stopping")
            break
Ejemplo n.º 25
0
def train(fold):
    training_data_path = "/content/train_images/"
    df = pd.read_csv("train_folds.csv")
    model_path = "/content/checkpoints/"
    device = "cpu"
    epochs = 10
    train_bs = 32
    val_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_val = df[df.kfold == fold].reset_index(drop=True)

    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True)
        ]
    )

    val_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
    train_targets = df_train.target.values


    val_images = df_val.image_name.values.tolist()
    val_images = [os.path.join(training_data_path, i + ".jpg") for i in val_images]
    val_targets = df_train.target.values

    train_dataset = ClassificationLoader(
        image_paths= train_images,
        targets = train_targets,
        resize=None,
        augmentations=train_aug
    )
    train_loder = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=train_bs,
        shuffle=False,
        num_workers=4
    )

    val_dataset = ClassificationLoader(
        image_paths= val_images,
        targets = val_targets,
        resize=None,
        augmentations=val_aug
    )

    val_loder = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=val_bs,
        num_workers=4
    )

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        mode="max"
    )
    es = EarlyStopping(patience=5, mode= "max")
    predictions = []
    for epoch in range(epochs):
        training_loss = Engine.train(train_loder, model, optimizer, device=device)
        val_loss = engine.evaluate(val_loder)
        predictions = np.vstack((engine.predict(val_loder))).ravel()
        auc = metrics.roc_auc_score(val_targets, predictions)
        scheduler.step(auc)
        print(f"epoch = {epoch}, auc = {auc}")
        es(auc, model, model_path)
        if es.early_stop:
            print("early stopping")
            break
Ejemplo n.º 26
0
def train(fold):
    training_data_path = "/home/abhishek/workspace/melanoma/input/jpeg/train224/"
    model_path = "/home/abhishek/workspace/melanoma-deep-learning"
    df = pd.read_csv("/home/abhishek/workspace/melanoma/input/train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + ".jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=train_aug)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentations=valid_aug)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_bs,
                                               shuffle=False,
                                               num_workers=4)

    model = SEResNext50_32x4d(pretrained="imagenet")
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           mode="max")

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level="O1",
                                      verbosity=0)

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch={epoch}, auc={auc}")
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))
        if es.early_stop:
            print("early stopping")
            break
Ejemplo n.º 27
0
def train(fold):
    # train image path
    training_data_path = input("Enter the train data path (resized image): ")
    # csv data path that was created from folds
    fold_csv_path = input("Enter the train_fold.csv file path: ")
    df = pd.read_csv(fold_csv_path)
    model_path = "model/"
    device = "cuda"
    epochs = 30
    train_batch_size = 32
    valid_batch_size = 16
    mean = (0.485, 0.456, 0.225)
    standard_deviation = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    # normalize images
    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    # train image mapping
    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_train.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + ".jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    # create train loader
    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=train_aug)
    # train loader
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_batch_size,
                                               shuffle=True,
                                               num_workers=4)

    # create valid dataset
    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentations=valid_aug)
    # validation data loader
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batch_size,
                                               shuffle=False,
                                               num_workers=4)

    # import model
    model = SEResNext(pretrained='imagenet')
    model.to(device)
    #
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    # dynamic learning rate reducing based on validation measurements.
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        # https://pytorch.org/docs/master/optim.html#torch.optim.lr_scheduler.ReduceLROnPlateau
        optimizer,
        patience=4,
        mode='max',
    )
    # use apex for mixed precision training
    # amp: Automatic Mixed Precision
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level='01',
                                      verbosity=0)
    # earlystopping
    es = EarlyStopping(patience=5, mode='max')
    # train the train data
    # use thr wtfml module for calculating loss and evaluation
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions).ravel())
        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch = {epoch}, auc= {auc}")
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))

        if es.early_stop:
            print("Early Stopping")
            break
Ejemplo n.º 28
0
            num_workers=4,
            drop_last=False,
            shuffle=True,
            tpu=False)

    valid_loader = ClassificationDataLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=(128, 128),
        augmentations=aug,
    ).fetch(batch_size=16,
            num_workers=4,
            drop_last=False,
            shuffle=False,
            tpu=False)

    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=15,
                                                gamma=0.6)

    for epoch in range(args.epochs):
        train_loss = Engine.train(train_loader,
                                  model,
                                  optimizer,
                                  device=args.device)

        valid_loss = Engine.evaluate(valid_loader, model, device=args.device)

        print(f"{epoch}, Train Loss={train_loss} Valid Loss={valid_loss}")
Ejemplo n.º 29
0
def train(fold):
    training_data_path = ''
    model_path = ""
    df = pd.read_csv("/.../train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32  #train batch size
    valid_bs = 16

    #normalize image pixel values
    mean = (0.485, 0.456, 0.406)  #these values are for this model
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(
        drop=True)  #absolutely removes the previous index
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    #for image augmentation
    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + '.jpg') for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + '.jpg') for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentation=train_aug)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentation=valid_aug)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_bs,
                                               shuffle=True,
                                               num_workers=4)

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(  # reduce learning rate if it plateaus at any level
        optimizer,
        patience=3,
        mode=
        "max"  #max because we'll be using scheduler on AUC(area under ROC curve)
    )

    model, optimizer = amp.initialize(  #apex is used for mixed precision training, it trains faster with less memory
        model,
        optimizer,
        opt_level="01",
        verbosity=0)

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(
            valid_targets, predictions
        )  #this is why valid_data should not be shuffled as opposed to training data
        scheduler.step(auc)
        print("epoch={}, auc={}".format(epoch, auc))
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))
        if es.early_stop:
            print('early stopping')
            break
Ejemplo n.º 30
0
        resize=(128, 128),
        augmentations=aug,
    ).fetch(batch_size=16,
            num_workers=4,
            drop_last=False,
            shuffle=False,
            tpu=False)

    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=15,
                                                gamma=0.6)

    for epoch in range(args.epochs):
        train_loss = Engine.train(train_loader,
                                  model,
                                  optimizer,
                                  device=args.device)
        predictions, valid_loss = Engine.evaluate(valid_loader,
                                                  model,
                                                  device=args.device)
        predictions = np.vstack((predictions))
        roc_1 = metrics.roc_auc_score(valid_targets[:, 0], predictions[:, 0])
        roc_2 = metrics.roc_auc_score(valid_targets[:, 1], predictions[:, 1])
        roc_3 = metrics.roc_auc_score(valid_targets[:, 2], predictions[:, 2])
        roc_4 = metrics.roc_auc_score(valid_targets[:, 3], predictions[:, 3])
        mean_roc = (roc_1 + roc_2 + roc_3 + roc_4) / 4
        print(
            f"Epoch={epoch}, Train Loss={train_loss} Valid Loss={valid_loss}, Mean ROC AUC={mean_roc}"
        )

    test_df = pd.read_csv(os.path.join(args.data_path, "test.csv"))