Beispiel #1
0
def predict(image_path, model):
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = [image_path]
    test_targets = [0]

    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentations=test_aug)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0)

    predictions = Engine.predict(test_loader, model, DEVICE)
    return np.vstack((predictions)).ravel()
def test(fold):
    test_img_path  = config.TEST_IMAGES
    device = config.DEVICE
    test_bs = config.TEST_BS

    df_test = pd.read_csv(config.TEST_FILE).reset_index(drop=True)
    model_ = model.SE_Resnext50_32x4d(pretrained=None)
    model_.load_state_dict(torch.load(os.path.join(config.MODEL_PATH, f"model_fold_{fold}.bin")))
    model_.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = alb.Compose([alb.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)])

    test_imgs = df_test.image_name.values.tolist()
    test_imgs = [os.path.join(test_img_path, i + ".jpg") for i in test_imgs]
    test_targets = np.zeros(len(test_imgs))

 
    test_dataset = ClassificationLoader(
        image_paths = test_imgs,
        targets=test_targets,
        resize=(256,256),
        augmentations=test_aug
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size= test_bs, shuffle=False, num_workers=4
    )

    predictions = Engine.predict(test_loader, model_, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Beispiel #3
0
def predict(image_path, model):

    aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    images = [image_path]
    targets = [0]

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=4
    )

    predictions = Engine.predict(test_loader, model, DEVICE)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Beispiel #4
0
def train_dataloader(images, targets):
    dataset = ClassificationLoader(image_paths=images,
                                   targets=targets,
                                   resize=None,
                                   augmentations=train_albumentation())
    data_loader = DataLoader(dataset=dataset,
                             batch_size=TRAIN_BS,
                             shuffle=True,
                             num_workers=10)

    return data_loader
Beispiel #5
0
def test_dataloader(images, targets):
    dataset = ClassificationLoader(image_paths=images,
                                   targets=targets,
                                   resize=None,
                                   augmentations=valid_albumentation())
    data_loader = DataLoader(dataset=dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=0)

    return data_loader
Beispiel #6
0
def predict(fold):

    training_data_path = "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test224/"
    model_path = "/media/vatsal/Movies & Games/Melenoma-Deep-Learning/model/"
    df_test = pd.read_csv(
        "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test.csv"
    )
    df_test.loc[:, "target"] = 0

    device = "cuda"
    epochs = 50
    test_bs = 8
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(training_data_path, i + ".jpg") for i in test_images
    ]
    test_targets = df_test.target.values

    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=8,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()
    return predictions
Beispiel #7
0
def predict(fold):
    test_data_path = ''
    model_path = ""
    df_test = pd.read_csv("/.../test.csv")
    df_test.loc[:, 'target'] = 0

    epochs = 50
    device = 'cuda'
    test_bs = 32  #train batch size
    #valid_bs = 16

    #normalize image pixel values
    mean = (0.485, 0.456, 0.406)  #these values are for this model
    std = (0.229, 0.224, 0.225)

    #df_train = df[df.kfold != fold].reset_index(drop=True) #absolutely removes the previous index
    #df_valid = df[df.kfold == fold].reset_index(drop=True)

    #for image augmentation
    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(testing_data_path, i + '.jpg') for i in test_images
    ]
    test_targets = df_test.target.values

    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentation=test_aug)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=test_bs,
                                              shuffle=True,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}")))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device)
    return np.vstack((predictions)).ravel()
Beispiel #8
0
def predict(fold):
    # train image path
    test_data_path = input("Enter the test data path (resized image): ")
    # csv data path that was created from folds
    test_csv_path = input("Enter the test.csv file path: ")
    df_test = pd.read_csv(test_csv_path)
    df_test.loc[:, "target"] = 0
    model_path = "model/"
    device = "cuda"
    epochs = 30
    test_batch_size = 16
    mean = (0.485, 0.456, 0.225)
    standard_deviation = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    # train image mapping
    test_images = df_test.image_name.values.tolist()
    test_images = [
        os.path.join(test_data_path, i + ".jpg") for i in test_images
    ]
    test_targets = df_test.target.values

    # create valid dataset
    test_dataset = ClassificationLoader(image_paths=test_images,
                                        targets=test_targets,
                                        resize=None,
                                        augmentations=test_aug)
    # validation data loader
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=test_batch_size,
                                              shuffle=False,
                                              num_workers=4)

    # import model
    model = SEResNext(pretrained='imagenet')
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model{fold}.bin")))

    model.to(device)
    #
    predictions = Engine.predict(test_loader, model, device)
    return np.vstack((predictions)).ravel()
Beispiel #9
0
def test_dataloader(images, targets):
    dataset = ClassificationLoader(image_paths=images,
                                   targets=targets,
                                   resize=None,
                                   augmentations=albumentations.Compose([
                                       albumentations.Normalize(
                                           MEAN,
                                           STD,
                                           max_pixel_value=255.0,
                                           always_apply=True)
                                   ]))
    data_loader = DataLoader(dataset=dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=0)
    return data_loader
def predict(fold):
    test_data_path = ""
    model_path = ""
    df_test = pd.read_csv("")
    df_test.loc[:, "target"] = 0
    device = "cuda"
    epochs = 50
    test_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.244, 0.255)

    test_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ]
    )

    test_images = df_test.image_name.values.tolist()
    test_images = [os.path.join(test_data_path, i + ".jpg") for i in test_images]
    test_targets = np.zeros(len(test_images))


    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=test_bs,
        shuffle=False,
        num_workers=4
    )

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(
        test_loader,
        model,
        device=device
    )
    return np.vstack((predictions)).ravel()
Beispiel #11
0
def predict(fold):
    print(f"Predicting fold #{fold}")
    test_data_path = "/mnt/Data/MelanomaClassification/input/kaggle/working/test224/"
    df = pd.read_csv("/mnt/Data/MelanomaClassification/input/test.csv")
    device = "cuda"
    model_path = os.path.join("/mnt/Data/MelanomaClassification/models/",
                              f"model_fold_{fold}.bin")

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    images = df.image_name.values.tolist()
    images = [os.path.join(test_data_path, i + ".png") for i in images]
    targets = np.zeros(len(images))

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=16,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions
Beispiel #12
0
def predict(image_path, fold):

    device = "cpu"
    model_path = "../model/"

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    test_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    test_images = [image_path]
    test_targets = [0]

    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=None,
        augmentations=test_aug,
    )

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=8,
                                              shuffle=False,
                                              num_workers=4)

    model = SEResNext50_32x4d(pretrained=None)
    model.load_state_dict(
        torch.load(os.path.join(model_path, f"model_fold_{fold}.bin"))
    )  #if gets error here add map_location=torch.device('cpu')

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()
    print(predictions)
    return predictions[0]
Beispiel #13
0
def predict(fold):
    #df load
    test_data_path=img_path+"/test3/"
    df=pd.read_csv(data_path+"/test.csv")
    device="cuda" if torch.cuda.is_available() else "cpu"
    model_path=saved_path+f"/model_fold_{fold}.bin"
    test_bs=32

    #augmentation
    mean=(0.485, 0.456, 0.406)
    std=(0.229, 0.224, 0.225)
    aug=albumentations.Compose([
        albumentations.Normalize(mean,std,max_pixel_value=255.0,always_apply=True)
    ])

    #dataset
    images=df.image_name.values.tolist()
    images=[test_data_path+file+".jpg" for file in images]
    targets=np.zeros(len(images))
    test_dataset=ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    #data loader
    test_loader=torch.utils.data.DataLoader(
        test_dataset,batch_size=test_bs,shuffle=False,num_workers=4)

    #model
    model=SEResnext50_32x4d(pretrained=True)
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions=Engine.predict(test_loader,model,device)
    predictions=np.vstack((predictions)).ravel()

    return predictions
def train(fold):
    training_data_path = "/home/sushi/code/Kaggle/Melanoma-Detection-/input/jpeg/train224"
    df = pd.read_csv(
        "/home/sushi/code/Kaggle/Melanoma-Detection-/input/train_folds.csv")
    model_path = "/home/sushi/code/Kaggle/Melanoma-Detection-/models"
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    train_aug = albumentations.Compose(
        albumentations.Normalize(always_apply=True))

    valid_aug = albumentations.Compose(
        albumentations.Normalize(always_apply=True))

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.targets.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + "jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=train_aug)

    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=valid_aug)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=False,
                                               num_workers=4)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=train_bs,
                                               shuffle=False,
                                               num_workers=4)

    model = SEResNext50_32x4d(pretrained="imagenet")

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           mode="max")

    #model,optimizer  = amp.initialize(
    #    model,
    #    optimizer,
    #   opt_level ="01",
    #   verbosity=0

    #)

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)

        predictions, valid_loss = Engine.evaluate(
            train_loader,
            model,
            optimizer,
            device,
        )

        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch={epoch}, auc:{auc}")

        es(auc, model, model_path)
        if es.early_stop:
            print("early stopping")
            break
Beispiel #15
0
def train(fold):
    training_data_path = "../input/siic-isic-224x224-images/train/"
    df = pd.read_csv("/kaggle/working/train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    model = SEResnext50_32x4d(pretrained="imagenet")
    model.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".png") for i in train_images]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [os.path.join(training_data_path, i + ".png") for i in valid_images]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)

        es(auc, model, model_path=f"model_fold_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break
Beispiel #16
0
def train(fold):
    training_data_path = "/content/train_images/"
    df = pd.read_csv("train_folds.csv")
    model_path = "/content/checkpoints/"
    device = "cpu"
    epochs = 10
    train_bs = 32
    val_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_val = df[df.kfold == fold].reset_index(drop=True)

    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True)
        ]
    )

    val_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
    train_targets = df_train.target.values


    val_images = df_val.image_name.values.tolist()
    val_images = [os.path.join(training_data_path, i + ".jpg") for i in val_images]
    val_targets = df_train.target.values

    train_dataset = ClassificationLoader(
        image_paths= train_images,
        targets = train_targets,
        resize=None,
        augmentations=train_aug
    )
    train_loder = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=train_bs,
        shuffle=False,
        num_workers=4
    )

    val_dataset = ClassificationLoader(
        image_paths= val_images,
        targets = val_targets,
        resize=None,
        augmentations=val_aug
    )

    val_loder = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=val_bs,
        num_workers=4
    )

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        mode="max"
    )
    es = EarlyStopping(patience=5, mode= "max")
    predictions = []
    for epoch in range(epochs):
        training_loss = Engine.train(train_loder, model, optimizer, device=device)
        val_loss = engine.evaluate(val_loder)
        predictions = np.vstack((engine.predict(val_loder))).ravel()
        auc = metrics.roc_auc_score(val_targets, predictions)
        scheduler.step(auc)
        print(f"epoch = {epoch}, auc = {auc}")
        es(auc, model, model_path)
        if es.early_stop:
            print("early stopping")
            break
Beispiel #17
0
def train(fold):
    training_data_path = "/home/abhishek/workspace/melanoma/input/jpeg/train224/"
    model_path = "/home/abhishek/workspace/melanoma-deep-learning"
    df = pd.read_csv("/home/abhishek/workspace/melanoma/input/train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + ".jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=train_aug)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentations=valid_aug)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_bs,
                                               shuffle=False,
                                               num_workers=4)

    model = SEResNext50_32x4d(pretrained="imagenet")
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           mode="max")

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level="O1",
                                      verbosity=0)

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch={epoch}, auc={auc}")
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))
        if es.early_stop:
            print("early stopping")
            break
Beispiel #18
0
def train(fold):
    # train image path
    training_data_path = input("Enter the train data path (resized image): ")
    # csv data path that was created from folds
    fold_csv_path = input("Enter the train_fold.csv file path: ")
    df = pd.read_csv(fold_csv_path)
    model_path = "model/"
    device = "cuda"
    epochs = 30
    train_batch_size = 32
    valid_batch_size = 16
    mean = (0.485, 0.456, 0.225)
    standard_deviation = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    # normalize images
    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std=standard_deviation,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    # train image mapping
    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + ".jpg") for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_train.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + ".jpg") for i in valid_images
    ]
    valid_targets = df_valid.target.values

    # create train loader
    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentations=train_aug)
    # train loader
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_batch_size,
                                               shuffle=True,
                                               num_workers=4)

    # create valid dataset
    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentations=valid_aug)
    # validation data loader
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batch_size,
                                               shuffle=False,
                                               num_workers=4)

    # import model
    model = SEResNext(pretrained='imagenet')
    model.to(device)
    #
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    # dynamic learning rate reducing based on validation measurements.
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        # https://pytorch.org/docs/master/optim.html#torch.optim.lr_scheduler.ReduceLROnPlateau
        optimizer,
        patience=4,
        mode='max',
    )
    # use apex for mixed precision training
    # amp: Automatic Mixed Precision
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level='01',
                                      verbosity=0)
    # earlystopping
    es = EarlyStopping(patience=5, mode='max')
    # train the train data
    # use thr wtfml module for calculating loss and evaluation
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions).ravel())
        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch = {epoch}, auc= {auc}")
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))

        if es.early_stop:
            print("Early Stopping")
            break
Beispiel #19
0
def train(fold):
    training_data_path = ''
    model_path = ""
    df = pd.read_csv("/.../train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32  #train batch size
    valid_bs = 16

    #normalize image pixel values
    mean = (0.485, 0.456, 0.406)  #these values are for this model
    std = (0.229, 0.224, 0.225)

    df_train = df[df.kfold != fold].reset_index(
        drop=True)  #absolutely removes the previous index
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    #for image augmentation
    train_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    valid_aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True),
    ])

    train_images = df_train.image_name.values.tolist()
    train_images = [
        os.path.join(training_data_path, i + '.jpg') for i in train_images
    ]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [
        os.path.join(training_data_path, i + '.jpg') for i in valid_images
    ]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(image_paths=train_images,
                                         targets=train_targets,
                                         resize=None,
                                         augmentation=train_aug)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_bs,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationLoader(image_paths=valid_images,
                                         targets=valid_targets,
                                         resize=None,
                                         augmentation=valid_aug)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_bs,
                                               shuffle=True,
                                               num_workers=4)

    model = SEResNext50_32x4d(pretrained='imagenet')
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(  # reduce learning rate if it plateaus at any level
        optimizer,
        patience=3,
        mode=
        "max"  #max because we'll be using scheduler on AUC(area under ROC curve)
    )

    model, optimizer = amp.initialize(  #apex is used for mixed precision training, it trains faster with less memory
        model,
        optimizer,
        opt_level="01",
        verbosity=0)

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(epochs):
        training_loss = Engine.train(train_loader,
                                     model,
                                     optimizer,
                                     device,
                                     fp16=True)
        predictions, valid_loss = Engine.evaluate(train_loader, model,
                                                  optimizer, device)
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(
            valid_targets, predictions
        )  #this is why valid_data should not be shuffled as opposed to training data
        scheduler.step(auc)
        print("epoch={}, auc={}".format(epoch, auc))
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))
        if es.early_stop:
            print('early stopping')
            break
Beispiel #20
0
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose([
        albumentations.Normalize(mean,
                                 std,
                                 max_pixel_value=255.0,
                                 always_apply=True)
    ])

    train_images, valid_images, train_targets, valid_targets = train_test_split(
        images, targets)

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=(128, 128),
        augmentations=aug,
    )

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=16,
                                               shuffle=True,
                                               num_workers=4)

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=(128, 128),
        augmentations=aug,
    )