def get_datasets(data):
    X_train, y_train, X_val, y_val = data
    datasets = {}
    datasets["train"] = MelanomaDataset(
        X_train, y_train, istrain=True, transforms=get_train_transforms()
    )
    datasets["valid"] = MelanomaDataset(
        X_val, y_val, istrain=False, transforms=get_valid_transforms()
    )
    return datasets
def GetDataLoader():

    df = pd.read_csv('train.csv')
    train_df , valid_df = get_train_val_split(df)

    if parser['augmentations']:
        train_transforms =  transforms.Compose([
                                    transforms.ColorJitter(brightness = 0.7 , contrast = 0.3),
                                    transforms.RandomRotation(degrees = 75),
                                    transforms.RandomHorizontalFlip(p = 0.6),
                                    transforms.RandomVerticalFlip(p = 0.7),
                                    
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean = (0.485, 0.456, 0.406) ,
                                    std = (0.229, 0.224, 0.225))])
        
        

    else:

        train_transforms = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean = (0.485, 0.456, 0.406) ,
                                    std = (0.229, 0.224, 0.225))])
    

    train_dataset = MelanomaDataset(df = train_df,
                                       path = parser['train_path'],
                                       transformations= train_transforms,
                                       is_train = True)
    
    
    trainloader = torch.utils.data.DataLoader(train_dataset , batch_size = parser['bs'],
                                            shuffle = True)
    
    valid_transforms = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean = (0.485, 0.456, 0.406) ,
                                    std = (0.229, 0.224, 0.225))])

    valid_dataset = MelanomaDataset(df = valid_df,
                                       path = parser['train_path'],
                                       transformations= valid_transforms,
                                       is_train = True)
    
    validloader = torch.utils.data.DataLoader(valid_dataset , batch_size= parser['batchsize'] , shuffle = False)

    return trainloader , validloader
def GetPreds(model_path, img_path, backbone):

    df = pd.read_csv('test.csv')
    test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    ])

    test_data = MelanomaDataset(df=df,
                                path=img_path,
                                transformations=test_transforms,
                                is_train=False)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=16)

    net = MelanomaModel(backbone=backbone)
    net.load_state_dict(torch.load(model_path))

    preds = []

    with torch.no_grad():

        for img in test_loader:

            if torch.cuda.is_available():

                img = img.cuda()

            logits = net(img)

            preds.extend(logits.cpu().detach().numpy().tolist())

    preds = np.array(preds)
    preds = sigmoid(preds)

    return preds
Beispiel #4
0
def main():
    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir_2020, args.data_dir_2019,
        args.data_dir_2018, args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    # load model
    models = []
    for fold in range(1):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim,
            pretrained=True,
            # meta_model=args.meta_model
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()
        models.append(model)

    # predict
    PROBS = []
    with torch.no_grad():
        for (data) in tqdm(test_loader):
            if args.use_meta:
                data, meta = data
                data, meta = data.to(device), meta.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
            else:
                data = data.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

            probs /= args.n_test
            probs /= len(models)

            PROBS.append(probs.detach().cpu())

    PROBS = torch.cat(PROBS).numpy()

    # save cvs
    df_test['target'] = PROBS[:, mel_idx]
    df_test['image_name'] = df_test['image']
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
Beispiel #5
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    LOGITS = []
    PROBS = []
    dfs = []
    for fold in range(5):

        df_valid = df[df['fold'] == fold]
        if args.DEBUG:
            df_valid = pd.concat([
                df_valid[df_valid['target'] == mel_idx].sample(args.batch_size * 3),
                df_valid[df_valid['target'] != mel_idx].sample(args.batch_size * 3)
            ])

        dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val)
        valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers)

        if args.eval == 'best':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        this_LOGITS, this_PROBS = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True)
        LOGITS.append(this_LOGITS)
        PROBS.append(this_PROBS)
        dfs.append(df_valid)

    dfs = pd.concat(dfs).reset_index(drop=True)
    dfs['pred'] = np.concatenate(PROBS).squeeze()[:, mel_idx]

    auc_all_raw = roc_auc_score(dfs['target'] == mel_idx, dfs['pred'])

    dfs2 = dfs.copy()
    for i in range(5):
        dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True)
    auc_all_rank = roc_auc_score(dfs2['target'] == mel_idx, dfs2['pred'])

    dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True)
    auc_20_raw = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    for i in range(5):
        dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True)
    auc_20_rank = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    content = f'Eval {args.eval}:\nauc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\nauc_20_raw : {auc_20_raw:.5f}\nauc_20_rank : {auc_20_rank:.5f}\n'
    print(content)
    with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender:
        appender.write(content + '\n')

    np.save(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values)
Beispiel #6
0
def run(fold, df, meta_features, n_meta_features, transforms_train,
        transforms_val, mel_idx):
    if args.DEBUG:
        args.n_epochs = 5
        df_train = df[df['fold'] != fold].sample(args.batch_size * 5)
        df_valid = df[df['fold'] == fold].sample(args.batch_size * 5)
    else:
        df_train = df[df['fold'] != fold]
        df_valid = df[df['fold'] == fold]

    dataset_train = MelanomaDataset(df_train,
                                    'train',
                                    meta_features,
                                    transform=transforms_train)
    dataset_valid = MelanomaDataset(df_valid,
                                    'valid',
                                    meta_features,
                                    transform=transforms_val)
    train_loader = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=args.batch_size,
        sampler=RandomSampler(dataset_train),
        num_workers=args.num_workers)  # 随机不重复采样
    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)
    model = ModelClass(
        args.enet_type,
        n_meta_features=n_meta_features,
        n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
        out_dim=args.out_dim,
        pretrained=True)
    if DP:
        model = apex.parallel.convert_syncbn_model(model)
    model = model.to(device)

    auc_max = 0.
    auc_20_max = 0.
    model_file = os.path.join(args.model_dir,
                              f'{args.kernel_type}_best_fold{fold}.pth')
    model_file2 = os.path.join(args.model_dir,
                               f'{args.kernel_type}_best_20_fold{fold}.pth')
    model_file3 = os.path.join(args.model_dir,
                               f'{args.kernel_type}_final_fold{fold}.pth')

    optimizer = optim.Adam(model.parameters(), lr=args.init_lr)
    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    if DP:
        model = nn.DataParallel(model)
    #     scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs - 1)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args.n_epochs - 1)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=1,
        after_scheduler=scheduler_cosine)

    print(len(dataset_train), len(dataset_valid))
    for epoch in range(1, args.n_epochs + 1):
        print(time.ctime(), f'Epoch {epoch}', f'Fold {fold}')
        #         scheduler_warmup.step(epoch - 1)

        train_loss = train_epoch(model, train_loader, optimizer)
        val_loss, acc, auc, auc_20 = val_epoch(
            model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values)

        content = time.ctime(
        ) + ' ' + f'Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.'
        print(content)
        with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'),
                  'a') as appender:
            appender.write(content + '\n')

        scheduler_warmup.step()
        if epoch == 2: scheduler_warmup.step()  # bug workaround

        if auc > auc_max:
            print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                auc_max, auc))
            torch.save(model.state_dict(), model_file)
            auc_max = auc
        if auc_20 > auc_20_max:
            print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                auc_20_max, auc_20))
            torch.save(model.state_dict(), model_file2)
            auc_20_max = auc_20

    torch.save(model.state_dict(), model_file3)
def main(model_type='resnet', n_epochs=20, lr=0.0005, batch_size=32):
    """ The main function. """

    #set file paths
    train_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/train/train_resized'  #'/data/train_resized/' #path to resized train image
    test_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/test/test_resized'  #'/data/test_resized/' #path to resized train image
    data_train = pd.read_csv(
        'data/train_processed.csv')  #path to processed csv file for train data
    data_test = pd.read_csv(
        'data/test_processed.csv')  #path to processed csv file for test data

    #split data_train into train and validation
    n_data_train = len(data_train)
    split = int(0.2 * n_data_train)
    data_train, data_valid = data_train.iloc[split:], data_train.iloc[0:split]

    #transformation for test and validation data
    transform_valid = Compose([
        CenterCrop(
            224),  # Crops out the center, resulting image shape is 224x224
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    #augmentations for the training data
    transform_train = Compose([
        CenterCrop(224),
        RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3,
                          fill=0),
        RandomVerticalFlip(p=0.5),
        RandomHorizontalFlip(p=0.5),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    #create the datasets
    dataset_train = MelanomaDataset(data_train,
                                    train_img_path,
                                    transform=transform_train)
    dataset_valid = MelanomaDataset(data_valid,
                                    train_img_path,
                                    transform=transform_valid)
    dataset_test = MelanomaTestDataset(data_test,
                                       test_img_path,
                                       transform=transform_valid)

    #create the batches with dataloader
    training_loader = DataLoader(dataset_train,
                                 batch_size=batch_size,
                                 shuffle=True)
    validation_loader = DataLoader(dataset_valid,
                                   batch_size=batch_size,
                                   shuffle=True)
    #test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False)

    print('There is ', len(dataset_train), 'images in train set and ', \
         len(dataset_valid), 'in dev set.')

    #define device
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'

    #define model and freeze the deepest layers
    if model_type == 'resnet':
        model = ResnetModel(9)
        no_train_layers = [
            model.cnn.layer1, model.cnn.layer2, model.cnn.layer3
        ]
        for layer in no_train_layers:
            for param in layer:
                param.requires_grad = False

    elif model_type == 'efficientnet':
        model = EfficientNetModel(9)
        model.cnn._conv_stem.requires_grad = False

        no_train_layers = model.cnn._blocks[:28]
        for layer in no_train_layers:
            #for param in layer:
            layer.requires_grad = False

    model = model.to(device)

    #define loss function
    loss_function = torch.nn.BCEWithLogitsLoss()

    #define optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)

    #define scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.5,
                                                     patience=1)

    train_loss = []
    validation_loss = []
    train_auc = []
    val_auc = []
    best_auc = 0.0

    #training loop
    for i in range(n_epochs):
        t1, v1, t_auc, v_auc = train_epoch(training_loader, validation_loader,
                                           model, loss_function, optimizer,
                                           device)

        print(f"\r Epoch {i+1}: Training loss = {t1}, Validation loss = {v1}, \
             \n Train auc = {t_auc},  Validation_auc = {v_auc}")
        print('lr = ', optimizer.param_groups[0]['lr'])

        train_loss.append(t1)
        validation_loss.append(v1)
        train_auc.append(t_auc)
        val_auc.append(v_auc)

        scheduler.step(v_auc)

        # save best model
        if v_auc > best_auc:
            torch.save(model, '/best_model.pt')
            best_auc = v_auc
            print('model saved')

    #plot the result
    epochs = np.arange(n_epochs)
    fig, ax = plt.subplots()
    ax.set_title('Training and Validation losses')
    ax.plot(epochs, train_loss, label='Train')
    ax.plot(epochs, validation_loss, label='Dev')
    plt.legend()

    fig, ax = plt.subplots()
    ax.set_title('Training and Validation ROC AUC')
    ax.plot(epochs, train_auc, label='Train')
    ax.plot(epochs, val_auc, label='Dev')
    plt.legend()
Beispiel #8
0
def predict_image(image_path):

    OUTPUTS = []
    n_test = 8
    transforms_train, transforms_val = get_transforms(config.image_size)

    dataset_test = MelanomaDataset(None,
                                   'test',
                                   None,
                                   transform=transforms_val,
                                   image_path=image_path)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=0)

    for fold in range(5):
        model_file = os.path.join(
            config.model_dir, f'{config.kernel_type}_best_o_fold{fold}.pth')

        ModelClass = Effnet_Melanoma
        model = ModelClass(config.enet_type, out_dim=config.out_dim)
        model = model.to(config.device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file,
                                             map_location=config.device),
                                  strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file, map_location=config.device)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        model.eval()

        LOGITS = []
        PROBS = []

        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if config.use_meta:
                    data, meta = data
                    data, meta = data.to(config.device), meta.to(config.device)
                    logits = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    probs = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    for I in range(n_test):
                        l = model(get_trans(data, I), meta)
                        logits += l
                        probs += l.softmax(1)
                else:
                    data = data.to(config.device)
                    logits = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    probs = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    for I in range(n_test):
                        l = model(get_trans(data, I))
                        logits += l
                        probs += l.softmax(1)
                logits /= n_test
                probs /= n_test

                LOGITS.append(logits.detach().cpu())
                PROBS.append(probs.detach().cpu())

        LOGITS = torch.cat(LOGITS).numpy()
        PROBS = torch.cat(PROBS).numpy()

        OUTPUTS.append(PROBS[:, config.mel_idx])

    #If you are predicting on your own moles, your don't need to rank the probability
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
        pred += probs
    pred /= len(OUTPUTS)

    return round(pred[0], 8)
Beispiel #9
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    print(f'\nPredicting test set using {args.enet_type} ...')

    OUTPUTS = []
    for fold in range(5):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim)
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = []

        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:
                    data = data.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test

                PROBS.append(probs.detach().cpu())

        PROBS = torch.cat(PROBS).numpy()
        OUTPUTS.append(PROBS[:, mel_idx])

    # Rank per fold (If you are predicting on your own moles, your don't need to rank the probability)
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
        pred += pd.Series(probs).rank(pct=True).values
    pred /= len(OUTPUTS)

    df_test['target'] = pred
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
    print('\nSaved submission in -> ./subs')
Beispiel #10
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers)

    PROBS = [] ## ! this is defined twice?
    for fold in range(5): # ! use model built from each fold

        if args.eval == 'best': # ! default
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = [] ## ! this is defined twice?
        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device) # batch x label 
                    for I in range(args.n_test): # ! fliping images 8 times.
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:   
                    data = data.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test # ! average over all the flips

                PROBS.append(probs.detach().cpu()) ## append prediction for this batch

        PROBS = torch.cat(PROBS).numpy() ## put in numpy format, PROBS is total_obs_size x num_labels

    df_test['target'] = PROBS[:, mel_idx] # ! takes @mel_idx column
    df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def run(fold, df, meta_features, n_meta_features, transforms_train,
        transforms_val, mel_idx):
    # en suivant le méthode k fold :
    if args.DEBUG:
        args.n_epochs = 5
        # la validation se fait la paquet de données dont l'id est fold
        # le reste des paquets on l'utilise pour le training
        df_train = df[df['fold'] != fold].sample(args.batch_size * 5)
        df_valid = df[df['fold'] == fold].sample(args.batch_size * 5)
    else:
        df_train = df[df['fold'] != fold]
        df_valid = df[df['fold'] == fold]

    # on instantie nous objet dataset (Training + Validation)
    dataset_train = MelanomaDataset(df_train,
                                    'train',
                                    meta_features,
                                    transform=transforms_train)
    dataset_valid = MelanomaDataset(df_valid,
                                    'valid',
                                    meta_features,
                                    transform=transforms_val)
    # on instantie nous data loader (training validation )
    train_loader = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=args.batch_size,
        sampler=RandomSampler(dataset_train),
        num_workers=args.num_workers)
    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)

    # on instantie notre model
    model = ModelClass(
        args.enet_type,  # ex : Resnet 
        n_meta_features=
        n_meta_features,  # ex ['sex', 'age_approx', 'n_images', 'image_size']
        n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
        out_dim=args.out_dim,
        pretrained=True)
    if DP:
        model = apex.parallel.convert_syncbn_model(model)
    model = model.to(device)

    # on instantie nous variables de précisions
    auc_max = 0.
    auc_20_max = 0.
    # on définie les fichiers dans les quels on stocke les paramètres modèles
    model_file = os.path.join(args.model_dir,
                              f'{args.kernel_type}_best_fold{fold}.pth')
    model_file2 = os.path.join(args.model_dir,
                               f'{args.kernel_type}_best_20_fold{fold}.pth')
    model_file3 = os.path.join(args.model_dir,
                               f'{args.kernel_type}_final_fold{fold}.pth')

    optimizer = optim.Adam(model.parameters(), lr=args.init_lr)
    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    if DP:
        model = nn.DataParallel(model)


#     scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs - 1)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args.n_epochs - 1)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=1,
        after_scheduler=scheduler_cosine)

    print(len(dataset_train), len(dataset_valid))

    for epoch in range(1, args.n_epochs + 1):
        print(time.ctime(), f'Fold {fold}, Epoch {epoch}')
        #         scheduler_warmup.step(epoch - 1)

        # train loss
        train_loss = train_epoch(model, train_loader, optimizer)
        # validation loss
        val_loss, acc, auc, auc_20 = val_epoch(
            model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values)

        content = time.ctime(
        ) + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.'
        print(content)
        with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'),
                  'a') as appender:
            appender.write(content + '\n')

        scheduler_warmup.step()
        if epoch == 2: scheduler_warmup.step()  # bug workaround

        # on stocke les paramètres model dans les fichiers correspondants
        if auc > auc_max:
            print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                auc_max, auc))
            torch.save(model.state_dict(), model_file)
            auc_max = auc
        if auc_20 > auc_20_max:
            print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                auc_20_max, auc_20))
            torch.save(model.state_dict(), model_file2)
            auc_20_max = auc_20
    # on stocke les paramètres model dont la précision maximale dans le fichier  model_file3
    torch.save(model.state_dict(), model_file3)
#                                              0.229, 0.224, 0.225])])

test_transform = A.Compose([
                            A.JpegCompression(p=0.5),
                            A.RandomSizedCrop(min_max_height=(int(resolution*0.9), int(resolution*1.1)),
                                              height=resolution, width=resolution, p=1.0),
                            A.HorizontalFlip(p=0.5),
                            A.VerticalFlip(p=0.5),
                            A.Transpose(p=0.5),
                            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                            ToTensorV2(),
                        ], p=1.0)



t_dataset=MelanomaDataset(df=df, imfolder=test,
                          train=False, transforms=test_transform, meta_features=meta_features)

print('Length of test set is {}'.format(len(t_dataset)))

testloader=DataLoader(t_dataset, batch_size=8, shuffle=False, num_workers=8)

"""Testing"""
# model = ResNetModel()()
# model = EfficientModel()
# model = EfficientModel(n_meta_features=len(meta_features))
model = Model(arch='efficientnet-b1')
# model.load_state_dict(torch.load("../checkpoint/fold_1/efficient_256/efficientb0_256_14_0.9212.pth", map_location=torch.device(device)))
model.load_state_dict(torch.load("..//checkpoint/fold_1/efficient_320/efficientb1_320_14_0.9293.pth", map_location=torch.device(device)))
model.to(device)

model.eval()
def run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx):
    if args.DEBUG:
        args.n_epochs = 5
        df_train = df[df['fold'] != fold].sample(args.batch_size * 5)
        df_valid = df[df['fold'] == fold].sample(args.batch_size * 5)
    else:
        df_train = df[df['fold'] != fold]
        df_valid = df[df['fold'] == fold]

    dataset_train = MelanomaDataset(df_train, 'train', meta_features, transform=transforms_train)
    dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val)
    train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size,
                                               sampler=RandomSampler(dataset_train), num_workers=args.num_workers)
    valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers)

    model = ModelClass()
    model = model.to(device)

    auc_max = 0.
    auc_20_max = 0.
    model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
    model_file2 = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
    model_file3 = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

    optimizer = optim.AdamW(model.parameters(), lr=args.init_lr,weight_decay=args.weight_decay)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs - 1)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1,
                                                after_scheduler=scheduler_cosine)

    print(len(dataset_train), len(dataset_valid))

    print('Continuing with model from ' + model_file3)
    try:
        checkpoint = torch.load(model_file)
        model.load_state_dict(checkpoint,strict=False)
    except:
        print('error')
        pass

    for epoch in range(1, args.n_epochs + 1):
        print(time.ctime(), f'Fold {fold}, Epoch {epoch}')
        #         scheduler_warmup.step(epoch - 1)

        train_loss = train_epoch(model, train_loader, optimizer)
        val_loss, acc, auc, auc_20 = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values)

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.'
        print(content)
        with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender:
            appender.write(content + '\n')

        scheduler_warmup.step()
        if epoch == 2: scheduler_warmup.step()  # bug workaround

        if auc > auc_max:
            print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_max, auc))
            torch.save(model.state_dict(), model_file)
            auc_max = auc
        if auc_20 > auc_20_max:
            print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_20_max, auc_20))
            torch.save(model.state_dict(), model_file2)
            auc_20_max = auc_20
        torch.save({
            'net': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, model_file3)
Beispiel #14
0


df_train=df[df['fold'] != fold]
df_valid=df[df['fold'] == fold]

class_sample_count = np.array([len(np.where(df_train["target"]==t)[0]) for t in np.unique(df_train["target"])])
print(class_sample_count)

# weight = 1. / class_sample_count
# samples_weight = np.array([weight[t] for t in df_train["target"]])
# samples_weight = torch.from_numpy(samples_weight)
# sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))
# print(samples_weight)

t_dataset=MelanomaDataset(df=df_train, imfolder=train,
                          train=True, transforms=train_transform, meta_features=meta_features)
v_dataset=MelanomaDataset(df=df_valid, imfolder=train,
                          train=True, transforms=valid_transform, meta_features=meta_features)

print('Length of training and validation set are {} {}'.format(
    len(t_dataset), len(v_dataset)))

trainloader=DataLoader(t_dataset, batch_size=32, shuffle=True, num_workers=8)
validloader=DataLoader(v_dataset, batch_size=32, shuffle=False, num_workers=8)

""" Training """
# model = ResNetModel()
# model = EfficientModelwithoutMeta()
model = Model(arch='efficientnet-b2')
# model = EfficientModel(n_meta_features=len(meta_features))
model.to(device)
Beispiel #15
0
# generate the meta data features for the train and tests sets
train_feat, test_feat = gen_train_test_feat(train_csv, test_csv)

# generate stratified splits using fixed random seed
skf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=408)

dummy_X = np.zeros(len(train_csv))
train_y = train_csv['target']

# if this isn't a cropped experiment we can create a single test loader
# but in the case of a cropped experiment the test set changes for each fold
if not args.cropped:
    test_dset = MelanomaDataset(test_csv,
                                test_imgs,
                                test_feat,
                                train=False,
                                labels=False,
                                transform=test_transform,
                                chip=args.chipped)
    test_loader = DataLoader(dataset=test_dset,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)

# create arrays for final test set predictions and best fold performances
final_preds = torch.zeros(len(test_csv))
fold_aucs = []

for fold, (train_idx, valid_idx) in enumerate(skf.split(X=dummy_X, y=train_y),
                                              1):