Ejemplo n.º 1
0
def main():
    df, _ = get_df(args.kernel_type, args.out_dim, args.data_dir)

    transforms_train, transforms_val = get_transforms(args.image_size)

    folds = [int(i) for i in args.train_fold.split(',')]
    run(folds, df, transforms_train, transforms_val)
Ejemplo n.º 2
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    folds = [int(i) for i in args.fold.split(',')]
    for fold in folds:
        run(fold, df, meta_features, n_meta_features, transforms_train,
            transforms_val, mel_idx)
Ejemplo n.º 3
0
def main():
    # Récupération des données
    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)
    # Récupérer les augmentations qui nous devons appliquer sur les images
    transforms_train, transforms_val = get_transforms(args.image_size)

    # trainer et valider notre réseau de neurone en se basant sur la méthode K- fold
    folds = [int(i) for i in args.fold.split(',')]
    for fold in folds:
        run(fold, df, meta_features, n_meta_features, transforms_train,
            transforms_val, mel_idx)
Ejemplo n.º 4
0
def main():
    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir_2020, args.data_dir_2019,
        args.data_dir_2018, args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    # load model
    models = []
    for fold in range(1):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim,
            pretrained=True,
            # meta_model=args.meta_model
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()
        models.append(model)

    # predict
    PROBS = []
    with torch.no_grad():
        for (data) in tqdm(test_loader):
            if args.use_meta:
                data, meta = data
                data, meta = data.to(device), meta.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
            else:
                data = data.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

            probs /= args.n_test
            probs /= len(models)

            PROBS.append(probs.detach().cpu())

    PROBS = torch.cat(PROBS).numpy()

    # save cvs
    df_test['target'] = PROBS[:, mel_idx]
    df_test['image_name'] = df_test['image']
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
Ejemplo n.º 5
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    LOGITS = []
    PROBS = []
    dfs = []
    for fold in range(5):

        df_valid = df[df['fold'] == fold]
        if args.DEBUG:
            df_valid = pd.concat([
                df_valid[df_valid['target'] == mel_idx].sample(args.batch_size * 3),
                df_valid[df_valid['target'] != mel_idx].sample(args.batch_size * 3)
            ])

        dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val)
        valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers)

        if args.eval == 'best':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        this_LOGITS, this_PROBS = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True)
        LOGITS.append(this_LOGITS)
        PROBS.append(this_PROBS)
        dfs.append(df_valid)

    dfs = pd.concat(dfs).reset_index(drop=True)
    dfs['pred'] = np.concatenate(PROBS).squeeze()[:, mel_idx]

    auc_all_raw = roc_auc_score(dfs['target'] == mel_idx, dfs['pred'])

    dfs2 = dfs.copy()
    for i in range(5):
        dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True)
    auc_all_rank = roc_auc_score(dfs2['target'] == mel_idx, dfs2['pred'])

    dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True)
    auc_20_raw = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    for i in range(5):
        dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True)
    auc_20_rank = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    content = f'Eval {args.eval}:\nauc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\nauc_20_raw : {auc_20_raw:.5f}\nauc_20_rank : {auc_20_rank:.5f}\n'
    print(content)
    with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender:
        appender.write(content + '\n')

    np.save(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values)
Ejemplo n.º 6
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    print(f'\nPredicting test set using {args.enet_type} ...')

    OUTPUTS = []
    for fold in range(5):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim)
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = []

        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:
                    data = data.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test

                PROBS.append(probs.detach().cpu())

        PROBS = torch.cat(PROBS).numpy()
        OUTPUTS.append(PROBS[:, mel_idx])

    # Rank per fold (If you are predicting on your own moles, your don't need to rank the probability)
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
        pred += pd.Series(probs).rank(pct=True).values
    pred /= len(OUTPUTS)

    df_test['target'] = pred
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
    print('\nSaved submission in -> ./subs')
Ejemplo n.º 7
0
def main():
    torch.cuda.set_device(1)
    # get dataframe
    df, out_dim = get_df(args.kernel_type, args.data_dir, args.train_step)
    print(f"out_dim = {out_dim}")

    # get adaptive margin
    tmp = np.sqrt(1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # get augmentations
    transforms_train, transforms_val = get_transforms(args.image_size)
    print("\ndata augmentation is done!\n")

    #extract images in folder 0 as demo
    df_demo_0 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/0')]
    df_demo_1 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/1')]
    df_demo_2 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/2')]
    df_demo_3 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/3')]

    df_demo = df_demo_0.append([df_demo_1, df_demo_2, df_demo_3])
    # get train and valid dataset
    df = df_demo

    df_train = df[df['fold'] != args.fold]
    df_valid = df[df['fold'] == args.fold].reset_index(drop=True).query("index % 15==0")

    dataset_train = LandmarkDataset(df_train, 'train', 'train', transform=transforms_train)
    dataset_valid = LandmarkDataset(df_valid, 'train', 'val', transform=transforms_val)
    valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True)

    print("dataset has been prepared!\n")
    # model
    print(torch.cuda.current_device())
    model = ModelClass(args.enet_type, out_dim=out_dim)
    model = nn.DataParallel(model, device_ids=[1, 3]).to("cuda:1, 3")

    # loss func
    def criterion(logits_m, target):
        arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80)
        loss_m = arc(logits_m, target, out_dim)
        return loss_m

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.init_lr)

    # load pretrained
    if len(args.load_from) > 0:
        # Todo:
        checkpoint = torch.load(args.load_from, map_location=lambda storage, loc: storage.cuda(3))
        state_dict = checkpoint['model_state_dict']
        state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}    
        if args.train_step == 1:
            del state_dict['metric_classify.weight']
            model.load_state_dict(state_dict, strict=False)
        else:
            model.load_state_dict(state_dict, strict=True)        
#             if 'optimizer_state_dict' in checkpoint:
#                 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])   
        del checkpoint, state_dict
        torch.cuda.empty_cache()
        import gc
        gc.collect()


    # lr scheduler
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs-1)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine)

    # train & valid loop
    gap_m_max = 0.
    model_file = os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}.pth')
    for epoch in range(args.start_from_epoch, args.n_epochs+1):

        print(time.ctime(), 'Epoch:', epoch)
        train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size,
                                                   num_workers=args.num_workers,
                                                   shuffle=True, drop_last=True)

        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, acc_m, gap_m = val_epoch(model, valid_loader, criterion)
        scheduler_warmup.step(epoch-1)
        if args.local_rank == 0:
            content = time.ctime() + ' ' + f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc_m: {(acc_m):.6f}, gap_m: {(gap_m):.6f}.'
            print(content)
            with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender:
                appender.write(content + '\n')

            print('gap_m_max ({:.6f} --> {:.6f}). Saving model ...'.format(gap_m_max, gap_m))
            torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        }, model_file)            
            gap_m_max = gap_m

        if epoch == args.stop_at_epoch:
            print(time.ctime(), 'Training Finished!')
            break

    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}_final.pth'))
Ejemplo n.º 8
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers)

    PROBS = [] ## ! this is defined twice?
    for fold in range(5): # ! use model built from each fold

        if args.eval == 'best': # ! default
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = [] ## ! this is defined twice?
        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device) # batch x label 
                    for I in range(args.n_test): # ! fliping images 8 times.
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:   
                    data = data.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test # ! average over all the flips

                PROBS.append(probs.detach().cpu()) ## append prediction for this batch

        PROBS = torch.cat(PROBS).numpy() ## put in numpy format, PROBS is total_obs_size x num_labels

    df_test['target'] = PROBS[:, mel_idx] # ! takes @mel_idx column
    df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def main():

    # get dataframe
    df, out_dim = get_df(args.kernel_type, args.data_dir, args.train_step)
    print(f"out_dim = {out_dim}")

    # get adaptive margin
    tmp = np.sqrt(
        1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # get augmentations
    transforms_train, transforms_val = get_transforms(args.image_size)

    # get train and valid dataset
    df_train = df[df['fold'] != args.fold]
    df_valid = df[df['fold'] == args.fold].reset_index(
        drop=True).query("index % 15==0")

    dataset_train = LandmarkDataset(df_train,
                                    'train',
                                    'train',
                                    transform=transforms_train)
    dataset_valid = LandmarkDataset(df_valid,
                                    'train',
                                    'val',
                                    transform=transforms_val)
    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)

    # model
    model = ModelClass(args.enet_type, out_dim=out_dim)
    model = model.cuda()
    model = apex.parallel.convert_syncbn_model(model)

    # loss func
    def criterion(logits_m, target):
        arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80)
        loss_m = arc(logits_m, target, out_dim)
        return loss_m

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.init_lr)
    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    # load pretrained
    if len(args.load_from) > 0:
        checkpoint = torch.load(args.load_from,
                                map_location='cuda:{}'.format(args.local_rank))
        state_dict = checkpoint['model_state_dict']
        state_dict = {
            k[7:] if k.startswith('module.') else k: state_dict[k]
            for k in state_dict.keys()
        }
        if args.train_step == 1:
            del state_dict['metric_classify.weight']
            model.load_state_dict(state_dict, strict=False)
        else:
            model.load_state_dict(state_dict, strict=True)


#             if 'optimizer_state_dict' in checkpoint:
#                 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        del checkpoint, state_dict
        torch.cuda.empty_cache()
        import gc
        gc.collect()

    model = DistributedDataParallel(model, delay_allreduce=True)

    # lr scheduler
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args.n_epochs - 1)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=1,
        after_scheduler=scheduler_cosine)

    # train & valid loop
    gap_m_max = 0.
    model_file = os.path.join(args.model_dir,
                              f'{args.kernel_type}_fold{args.fold}.pth')
    for epoch in range(args.start_from_epoch, args.n_epochs + 1):

        print(time.ctime(), 'Epoch:', epoch)
        scheduler_warmup.step(epoch - 1)

        train_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset_train)
        train_sampler.set_epoch(epoch)

        train_loader = torch.utils.data.DataLoader(
            dataset_train,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            shuffle=train_sampler is None,
            sampler=train_sampler,
            drop_last=True)

        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, acc_m, gap_m = val_epoch(model, valid_loader, criterion)

        if args.local_rank == 0:
            content = time.ctime(
            ) + ' ' + f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc_m: {(acc_m):.6f}, gap_m: {(gap_m):.6f}.'
            print(content)
            with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'),
                      'a') as appender:
                appender.write(content + '\n')

            print('gap_m_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                gap_m_max, gap_m))
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, model_file)
            gap_m_max = gap_m

        if epoch == args.stop_at_epoch:
            print(time.ctime(), 'Training Finished!')
            break

    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        },
        os.path.join(args.model_dir,
                     f'{args.kernel_type}_fold{args.fold}_final.pth'))
Ejemplo n.º 10
0
def main():

    df_train, df_test = get_df(args.kernel_type, args.out_dim, args.data_dir)
    folds = [int(i) for i in args.val_fold.split(',')]
    if args.mode == 'val':
        df_test = df_train[df_train['fold'].isin(folds)]
    _, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = RetinalDataset(df_test, 'val', transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)
    # nums = dataset_test.get_num()
    # content = f'total num of test:{len(dataset_test)},class nums:{nums}'
    # print(content)
    # load model
    models = []
    for fold in range(1):

        if args.eval == 'best':
            model_file = os.path.join(args.model_dir,
                                      f'{args.kernel_type}_best.pth')
        elif args.eval == 'final':
            model_file = os.path.join(args.model_dir,
                                      f'{args.kernel_type}_final.pth')

        model = ModelClass(args.enet_type,
                           out_dim=args.out_dim,
                           pretrained=True,
                           freeze_cnn=args.freeze_cnn)
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        # if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
        #     model = torch.nn.DataParallel(model)

        model.eval()
        models.append(model)

    LOGITS = []
    with torch.no_grad():
        for (data, target) in tqdm(test_loader):
            data, target = data.to(device), target.to(device)
            logits = torch.zeros((data.shape[0], args.out_dim)).to(device)
            for model in models:
                for I in range(args.n_test):
                    l = model(get_trans(data, I))
                    logits += l

            logits /= args.n_test
            logits /= len(models)
            logits = F.sigmoid(logits)
            LOGITS.append(logits.detach().cpu())

    LOGITS = torch.cat(LOGITS).numpy()
    PREDICT = []
    for logit in LOGITS:

        predicted_label = []
        for i in range(logit.size):
            if logit[i].item() > 0.5:  # Threshold is 0.5
                predicted_label.append(i)
        if len(predicted_label) == 0:
            predicted_label.append(0)
        PREDICT.append(" ".join([str(label) for label in predicted_label]))
    df_test['predicted'] = PREDICT
    df_test[['filename', 'predicted'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
Ejemplo n.º 11
0
def main(args):

    # get dataframe
    df = get_df(args.groups)

    # get adaptive margin
    tmp = np.sqrt(
        1 / np.sqrt(df['label_group'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # get augmentations
    transforms_train, transforms_val = get_transforms(args.image_size,
                                                      args.stage)

    # get train and valid dataset
    df_train = df[df['fold'] != args.fold] if not args.full else df
    df_train['label_group'] = LabelEncoder().fit_transform(
        df_train.label_group)

    df_valid = df[df['fold'] == args.fold]

    out_dim = df_train.label_group.nunique()
    print(f"out_dim = {out_dim}")

    dataset_train = ShoppeDataset(df_train,
                                  'train',
                                  transform=transforms_train)
    dataset_valid = ShoppeDataset(df_valid, 'val', transform=transforms_val)

    print(
        f'Train on {len(df_train)} images, validate on {len(df_valid)} images')

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               shuffle=True,
                                               drop_last=True)
    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)

    loss_config = decode_config(args.loss_config)
    # model
    if args.enet_type == 'resnest50':
        model = Resnest50(out_dim=out_dim, loss_config=loss_config, args=args)
    else:
        model = Model(args.enet_type,
                      out_dim=out_dim,
                      loss_config=loss_config,
                      args=args)
    model = model.cuda()

    # loss func
    criterion = get_criterion(args, out_dim, margins)

    # optimizer
    optimizer = optim.AdamW(model.parameters(), lr=args.init_lr)

    # load pretrained
    if args.load_from and args.load_from != 'none':
        checkpoint = torch.load(args.load_from, map_location='cuda:0')
        state_dict = checkpoint['model_state_dict']
        state_dict = {
            k[7:] if k.startswith('module.') else k: state_dict[k]
            for k in state_dict.keys()
        }
        model.load_state_dict(state_dict, strict=True)
        del checkpoint, state_dict
        torch.cuda.empty_cache()
        gc.collect()
        print(f"Loaded weight from {args.load_from}")

    # lr scheduler
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args.n_epochs - 1)
    warmup_epochs = args.warmup_epochs if args.stage == 1 else 1
    print(warmup_epochs)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=warmup_epochs,
        after_scheduler=scheduler_cosine)

    # train & valid loop
    best_score = -1
    model_file = os.path.join(
        args.model_dir,
        weight_file(args.kernel_type, args.fold, args.stage,
                    loss_config.loss_type, out_dim))
    for epoch in range(args.start_from_epoch, args.n_epochs + 1):

        print(time.ctime(), f'Epoch: {epoch}/{args.n_epochs}')
        scheduler_warmup.step(epoch - 1)

        train_loss, acc_list = train_epoch(model, train_loader, optimizer,
                                           criterion)
        f1score = val_epoch(model, valid_loader, criterion, df_valid, args)

        content = time.ctime() + ' ' + \
            (
                f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f},'
                f' train acc {np.mean(acc_list):.5f}, f1score: {(f1score):.6f}.')

        print(content)
        with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'),
                  'a') as appender:
            appender.write(content + '\n')

        if f1score > best_score:
            print('best f1 score ({:.6f} --> {:.6f}). Saving model ...'.format(
                best_score, f1score))
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, model_file)
            best_score = f1score

        if epoch == args.stop_at_epoch:
            print(time.ctime(), 'Training Finished!')
            break

    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, model_file)