Example #1
0
    def __call__(self):
        print("Inference")
        if self.get("infer_flag"):
            test_df = pd.DataFrame()
            test_df["image_id"] = list(os.listdir(osp.join(self.data_path, "test_images")))
            if test_df.shape[0] == 1:
                for i in range(7):
                    test_df = test_df.append(test_df)
                test_df = test_df.reset_index(drop=True)

            test_dataset = TestDataset(test_df, self.data_path, get_transforms('valid', self.get("val_transform_params")))
            testloader = DataLoader(test_dataset, batch_size=self.get("batch_size"), num_workers=self.get("num_workers"), shuffle=False, pin_memory=False)
            oof_preds = []
            for seed in self.seeds:
                for fold in range(self.get("n_splits")):
                    self.params["seed"] = seed
                    self.params["fold"] = fold
                    self.params["pretrained"] = False
                    model = CassavaClassifierModel(self.params)
                    model.read_weight()
                    preds = model.predict(testloader)
                    pd.DataFrame(preds, columns=[f"pred_{n}" for n in range(self.params["output_size"])]).to_csv(osp.join(self.preds_path, f"pred_{seed}_{fold}.csv"), index=False)
                    oof_preds.append(preds)
            oof_preds = np.mean(np.array(oof_preds), axis=0)
            oof_preds = pd.DataFrame(preds, columns=[f"pred_{n}" for n in range(self.params["output_size"])])
            oof_preds.to_csv(osp.join(self.preds_path, "pred.csv"), index=False)
Example #2
0
    def test_model(self, img):
        if self.is_cpu:
            model = torch.load(self.model_path,
                               map_location=torch.device("cpu"))
        else:
            model = torch.load(self.model_path)
        model = model["model"]
        model.eval()

        img_t = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = img_t
        augmented = get_transforms((0.485, 0.456, 0.406),
                                   (0.229, 0.224, 0.225))(image=img)
        imgtf1 = augmented["image"]
        x = imgtf1[np.newaxis, ...]
        x.shape
        x = x.to(device)
        pred = model(x)
        y = pred[0].detach().cpu().numpy()
        y = np.exp(y) / sum(np.exp(y))

        onx = list(np.argsort(y))
        onx = onx[::-1]

        ony = y[onx]
        ony = list(np.round(ony, 2))

        for i in range(len(ony)):
            ony[i] = float(ony[i])

        for i in range(len(onx)):
            onx[i] = str(int(onx[i])).zfill(5)

        return onx, ony
Example #3
0
def load_prepare_data(train_path, val_path, batch_size, shuffle, verbose=True):
    assert CLASSES

    train_transform, val_transform = get_transforms()

    train_ds = QuickDrawDataset(csv_file=train_path, transform=train_transform)
    train_dl = DataLoader(train_ds,
                          batch_size=batch_size,
                          shuffle=shuffle,
                          num_workers=0)

    val_ds, val_dl = None, None
    if val_path:
        val_ds = QuickDrawDataset(csv_file=val_path, transform=val_transform)
        val_dl = DataLoader(val_ds,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0)

    if verbose:
        print('Number of classes:', len(CLASSES))
        print('Train: %d, Valid: %d' %
              (len(train_ds), len(val_ds) if val_ds else 0))

    return train_ds, train_dl, val_ds, val_dl
def main():
    df, _ = get_df(args.kernel_type, args.out_dim, args.data_dir)

    transforms_train, transforms_val = get_transforms(args.image_size)

    folds = [int(i) for i in args.train_fold.split(',')]
    run(folds, df, transforms_train, transforms_val)
Example #5
0
def main():

    train_dataframe, valid_dataframe = make_train_valid_dfs()
    train_loader = make_loaders(
        dataframe=train_dataframe,
        vocabulary=Vocabulary(freq_threshold=config.FREQ_THRESHOLD),
        transforms=get_transforms(mode="train"),
        mode="train",
    )
    vocab = train_loader.dataset.vocab
    valid_loader = make_loaders(
        dataframe=valid_dataframe,
        vocabulary=vocab,
        transforms=get_transforms(mode="valid"),
        mode="valid",
    )
    encoder, decoder, encoder_optimizer, decoder_optimizer = build_model(
        vocab_size=vocab.vocab_size)
    criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
    encoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        encoder_optimizer,
        factor=config.FACTOR,
        patience=config.PATIENCE,
        verbose=True)
    decoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        decoder_optimizer,
        factor=config.FACTOR,
        patience=config.PATIENCE,
        verbose=True)

    for epoch in range(config.EPOCHS):
        train_loss = train_one_epoch(
            train_loader,
            encoder,
            decoder,
            criterion,
            encoder_optimizer,
            decoder_optimizer,
            config.DEVICE,
        )

        # encoder_scheduler.step(valid_loss.avg)
        # decoder_scheduler.step(valid_loss.avg)

        predict(valid_loader, encoder, decoder, config.DEVICE)
Example #6
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    folds = [int(i) for i in args.fold.split(',')]
    for fold in folds:
        run(fold, df, meta_features, n_meta_features, transforms_train,
            transforms_val, mel_idx)
Example #7
0
    def __init__(self, model_path):
        super(Server, self).__init__()
        self.my_socket = None

        self.n_clients = 0

        self.guesses = []
        print(model_path)
        self.model = QuickDrawNet.load_from_file(model_path)
        self.model.eval()

        _, self.transform = get_transforms()
def main():
    # Récupération des données
    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)
    # Récupérer les augmentations qui nous devons appliquer sur les images
    transforms_train, transforms_val = get_transforms(args.image_size)

    # trainer et valider notre réseau de neurone en se basant sur la méthode K- fold
    folds = [int(i) for i in args.fold.split(',')]
    for fold in folds:
        run(fold, df, meta_features, n_meta_features, transforms_train,
            transforms_val, mel_idx)
def load_prepare_data(test_path, verbose=True):
    assert CLASSES

    _, test_transform = get_transforms()

    test_ds = QuickDrawDataset(csv_file=test_path, transform=test_transform)
    test_dl = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=0)

    if verbose:
        print('Number of classes:', len(CLASSES))
        print('Test: %d' % len(test_ds))

    return test_ds, test_dl
Example #10
0
def main():

    train_dataframe, valid_dataframe = make_train_valid_dfs()
    train_loader = make_loaders(
        dataframe=train_dataframe,
        vocabulary=Vocabulary(freq_threshold=config.FREQ_THRESHOLD),
        transforms=get_transforms(mode="train"),
        mode="train",
    )
    vocab = train_loader.dataset.vocab
    valid_loader = make_loaders(
        dataframe=valid_dataframe,
        vocabulary=vocab,
        transforms=get_transforms(mode="valid"),
        mode="valid",
    )

    # model = CaptioningTransformer(vocab_size=vocab.vocab_size, d_model=config.D_MODEL).to(config.DEVICE)
    model = TransformerCaptioning(vocab_size=vocab.vocab_size).to(
        config.DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.LR)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                              factor=0.8,
                                                              patience=3)
    criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
    train_eval(
        config.EPOCHS,
        model,
        train_loader,
        valid_loader,
        criterion,
        optimizer,
        config.DEVICE,
        config,
        lr_scheduler,
    )
Example #11
0
def build_loaders(dataframe, tokenizer, mode):
    transforms = get_transforms(mode=mode)
    dataset = CLIPDataset(
        dataframe["image"].values,
        dataframe["caption"].values,
        tokenizer=tokenizer,
        transforms=transforms,
    )
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=CFG.batch_size,
        num_workers=CFG.num_workers,
        shuffle=True if mode == "train" else False,
    )
    return dataloader
Example #12
0
    def train(self, train_df, val_df, seed, fold):
        if self.debug:
            train_df = train_df[:self.get("batch_size")+1]
            val_df = val_df[:self.get("batch_size")+1]
            self.params["epochs"] = 1
        if self.raw_dirname == "cassava-leaf-disease-classification-merged":
            val_df = val_df[val_df["source"] == 2021]

        train_dataset = TrainDataset(train_df, self.data_path, get_transforms('train', self.get("tr_transform_params"), self.get("tr_transforms")))
        val_dataset = TrainDataset(val_df, self.data_path, get_transforms('valid', self.get("val_transform_params")))
        trainloader, validloader = get_dataloader(train_dataset, val_dataset, self.get("batch_size"), self.get("num_workers"))
        self.params["seed"] = seed
        self.params["fold"] = fold
        self.params["pretrained"] = True
        
        model = CassavaClassifierModel(self.params)
        if self.get("do_retrain"):
            model.read_weight()
        model.fit(trainloader, validloader)

        # valid predict
        val_preds = model.val_preds
        val_preds = pd.DataFrame(val_preds, columns=[f"pred_{n}" for n in range(self.params["output_size"])])
        val_preds.to_csv(osp.join(self.val_preds_path, f"preds_{seed}_{fold}.csv"), index=False)
Example #13
0
def Loader(img_path=None,
           uploaded_image=None,
           upload_state=False,
           demo_state=True):
    test_dataset = CassavaDataset(test,
                                  img_path,
                                  uploaded_image=uploaded_image,
                                  transform=get_transforms(data='valid'),
                                  uploaded_state=upload_state,
                                  demo_state=demo_state)
    test_loader = DataLoader(test_dataset,
                             batch_size=CFG.batch_size,
                             shuffle=False,
                             num_workers=CFG.num_workers,
                             pin_memory=True)
    return test_loader
    def setup(self, stage=None):
        tfms_trn, tfms_val = get_transforms(self.imgsz)
        self.num_classes = self.dftrn.label_group.nunique()
        self.i2grp = sorted(self.dftrn.label_group.unique())
        self.grp2i = {v: k for k, v in enumerate(self.i2grp)}
        self.dftrn.label_group = self.dftrn.label_group.map(self.grp2i)
        self.dfval.label_group = self.dfval.label_group.map(self.grp2i)
        self.train_dataset = ShopeeDataset(self.dftrn, transform=tfms_trn)
        self.eval_dataset = ShopeeDataset(self.dfval, transform=tfms_val)

        # get adaptive margin
        tmp = np.sqrt(
            1 /
            np.sqrt(self.dftrn.label_group.value_counts().sort_index().values))
        self.margins = (tmp - tmp.min()) / (tmp.max() -
                                            tmp.min()) * 0.45 + 0.05
def plot_top_loss(k=10):
    df = pd.read_csv(os.path.join(cfg.ds_folder, 'new_valid.csv'),
                     encoding='utf8')
    folder = os.path.join(cfg.ds_folder, 'train')
    _, tfms = get_transforms(cfg.img_size)

    model = get_model_by_name(cfg.model_name)
    model.load_state_dict(
        torch.load(glob("../runs/exp15/best*")[0])['state_dict'])
    model.to("cuda:0")
    model.eval()

    error_list = []
    error_index = []
    for i in tqdm(range(len(df))):
        with torch.no_grad():
            filename = os.path.join(folder, str(df['id'][i]) + ".jpg")
            label = df['label'][i]
            img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
            img = tfms(image=img)['image'].astype('float32').transpose(2, 0, 1)
            img = torch.from_numpy(
                img.reshape(-1, 3, cfg.img_size, cfg.img_size)).to("cuda:0")
            pred = model(img)
            _, pred_label = torch.max(pred, dim=1)
            yes = (pred_label.cpu() == label).item()
            if yes:
                continue
            else:
                error_score = pred.cpu().numpy().squeeze()[pred_label]
                error_list.append(error_score)
                error_index.append(i)
    ind = np.argpartition(np.array(error_list), -k)[-k:]
    error_img_index = np.array(error_index)[ind]

    # plot
    plt.figure(figsize=(12, 6))
    for i in range(k):
        plt.subplot(2, 5, i + 1)
        plt.imshow(
            Image.open(
                os.path.join(folder,
                             str(df['id'][error_img_index[i]]) +
                             ".jpg")).convert('RGB'))
        print("filename is", str(df['id'][error_img_index[i]]) + ".jpg")
        plt.title("label {}".format(df['label'][error_img_index[i]]))
    plt.show()
Example #16
0
def main():
    '''
    ####################################################
    # stone data 데이터셋 : dataset.get_df_stone
    ####################################################
    '''
    df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone(
        k_fold=args.k_fold,
        out_dim=args.out_dim,
        data_dir=args.data_dir,
        data_folder=args.data_folder,
        use_meta=args.use_meta,
        use_ext=args.use_ext)

    # 모델 트랜스폼 가져오기
    transforms_train, transforms_val = get_transforms(args.image_size)

    folds = range(args.k_fold)
    #folds = [0, 1, 2, 3, 4]
    for fold in folds:
        run(fold, df_train, meta_features, n_meta_features, transforms_train,
            transforms_val, target_idx)
def main():
    '''
    ####################################################
    # stone data 데이터셋 : dataset.get_df_stone
    ####################################################
    '''
    df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone(
        k_fold=args.k_fold,
        out_dim=args.out_dim,
        data_dir=args.data_dir,
        data_folder=args.data_folder,
        use_meta=args.use_meta,
        use_ext=args.use_ext)

    # customize image transform
    # albumentations.MotionBlur(blur_limit=attack_strength, p=1.0),

    # 모델 트랜스폼 가져오기
    #transforms_train, transforms_val = get_transforms(args.image_size)
    auc_save = []
    #attack_strengths = [0,3,5,7,9,11]
    attack_strengths = [13, 17, 21, 25, 29]

    for attack_strength in attack_strengths:
        transforms_train, transforms_val = get_transforms(
            args.image_size, attack_strength)
        auc_max = run(0, df_train, meta_features, n_meta_features,
                      transforms_train, transforms_val, target_idx)
        print(auc_max)
        auc_save.append(auc_max)
        with open(os.path.join("./noise_watch", f'log_{args.kernel_type}.txt'),
                  'a') as appender:
            appender.write('attack_strengths   :   ' + str(attack_strength) +
                           '      auc   :   ' + str(auc_max) + '\n')

    plt.plot(auc_save)
    plt.show()
from utils import seed_everything
from dataset import ImageDataset, get_transforms
from trainer import CycleGAN
from visualize import unnorm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
''' Seeding all random parameters for reproducibility '''
seed_everything(42)
'''Default configuration for training CycleGAN'''
size = 256
batch_size = 1
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
'''Reading data and creating Dataloader'''
monet_dir = 'monet_jpg/'
photo_dir = 'photo_jpg/'
transform = get_transforms(size, mean, std)
img_dataset = ImageDataset(monet_dir=monet_dir,
                           photo_dir=photo_dir,
                           transform=transform)
img_dl = DataLoader(img_dataset, batch_size=batch_size, pin_memory=True)
'''Fixed set of monet and photos for visualizing them throughout the training process'''
idx = np.random.randint(0, len(img_dataset), 5)
fixed_photo = torch.cat([img_dataset[i][0].unsqueeze(0) for i in idx], 0)
fixed_monet = torch.cat([img_dataset[i][1].unsqueeze(0) for i in idx], 0)
''' Creating an instance of the trainer class '''
gan = CycleGAN(3, 3, 100, device, (fixed_photo, fixed_monet), decay_epoch=50)
gan.train(img_dl)
'''Finally visualising photos against their generated monet-style paintings'''
fig, ax = plt.subplots(5, 2, figsize=(12, 8))
for i in range(5):
    photo_img, monet_img = next(iter(img_dl))
Example #19
0
                       shuffle=True,
                       random_state=CFG.seed)
for n, (train_index,
        val_index) in enumerate(Fold.split(folds, folds['InChI_length'])):
    folds.loc[val_index, 'fold'] = int(n)

fold = 0
trn_idx = folds[folds['fold'] != fold].index
val_idx = folds[folds['fold'] == fold].index[:60000]

train_folds = folds.loc[trn_idx].reset_index(drop=True)
valid_folds = folds.loc[val_idx].reset_index(drop=True)

train_dataset = TrainDataset(train_folds,
                             tokenizer,
                             transform=get_transforms(data='train'))
valid_dataset = TrainDataset(valid_folds,
                             tokenizer,
                             transform=get_transforms(data='valid'))

train_loader = DataLoader(train_dataset,
                          batch_size=CFG.batch_size,
                          shuffle=True,
                          num_workers=CFG.num_workers,
                          pin_memory=True,
                          drop_last=True,
                          collate_fn=bms_collate)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=CFG.batch_size,
    shuffle=False,
Example #20
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    LOGITS = []
    PROBS = []
    dfs = []
    for fold in range(5):

        df_valid = df[df['fold'] == fold]
        if args.DEBUG:
            df_valid = pd.concat([
                df_valid[df_valid['target'] == mel_idx].sample(args.batch_size * 3),
                df_valid[df_valid['target'] != mel_idx].sample(args.batch_size * 3)
            ])

        dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val)
        valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers)

        if args.eval == 'best':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        this_LOGITS, this_PROBS = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True)
        LOGITS.append(this_LOGITS)
        PROBS.append(this_PROBS)
        dfs.append(df_valid)

    dfs = pd.concat(dfs).reset_index(drop=True)
    dfs['pred'] = np.concatenate(PROBS).squeeze()[:, mel_idx]

    auc_all_raw = roc_auc_score(dfs['target'] == mel_idx, dfs['pred'])

    dfs2 = dfs.copy()
    for i in range(5):
        dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True)
    auc_all_rank = roc_auc_score(dfs2['target'] == mel_idx, dfs2['pred'])

    dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True)
    auc_20_raw = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    for i in range(5):
        dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True)
    auc_20_rank = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred'])

    content = f'Eval {args.eval}:\nauc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\nauc_20_raw : {auc_20_raw:.5f}\nauc_20_rank : {auc_20_rank:.5f}\n'
    print(content)
    with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender:
        appender.write(content + '\n')

    np.save(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values)
Example #21
0
def main():
    '''
    ####################################################
    # stone data 데이터셋 : dataset.get_df_stone
    ####################################################
    '''
    df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone(
        k_fold=args.k_fold,
        out_dim=args.out_dim,
        data_dir=args.data_dir,
        data_folder=args.data_folder,
        use_meta=args.use_meta,
        use_ext=args.use_ext)

    transforms_train, transforms_val = get_transforms(args.image_size)

    # https://discuss.pytorch.org/t/error-expected-more-than-1-value-per-channel-when-training/26274
    # batch_normalization에서 배치 사이즈 1인 경우 에러 발생할 수 있음
    # 문제가 발생한 경우 배치 사이즈를 조정해서 해야한다.
    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MMC_ClassificationDataset(df_test,
                                             'test',
                                             meta_features,
                                             transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    PROBS = []
    folds = range(args.k_fold)
    for fold in folds:

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_no_ext':
            model_file = os.path.join(
                args.model_dir,
                f'{args.kernel_type}_best_no_ext_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim)
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = []
        TARGETS = []
        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:
                    data = data.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test

                PROBS.append(probs.detach().cpu())

        PROBS = torch.cat(PROBS).numpy()

        df_test['target'] = PROBS[:, target_idx]
        #acc = (PROBS.argmax(1) == TARGETS).mean() * 100.
        #auc = roc_auc_score((TARGETS == target_idx).astype(float), PROBS[:, target_idx])

        #df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{fold}_{acc:.2f}_{auc:.4f}.csv'), index=False)
        df_test[['image_name', 'target']].to_csv(os.path.join(
            args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{fold}.csv'),
                                                 index=False)
Example #22
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type,
        args.out_dim,
        args.data_dir,
        args.data_folder,
        args.use_meta
    )

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers)

    PROBS = [] ## ! this is defined twice?
    for fold in range(5): # ! use model built from each fold

        if args.eval == 'best': # ! default
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
            model.load_state_dict(state_dict, strict=True)
        
        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = [] ## ! this is defined twice?
        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device) # batch x label 
                    for I in range(args.n_test): # ! fliping images 8 times.
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:   
                    data = data.to(device)
                    probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test # ! average over all the flips

                PROBS.append(probs.detach().cpu()) ## append prediction for this batch

        PROBS = torch.cat(PROBS).numpy() ## put in numpy format, PROBS is total_obs_size x num_labels

    df_test['target'] = PROBS[:, mel_idx] # ! takes @mel_idx column
    df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
Example #23
0
def main(fold):
    COMPUTE_CV = True
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    data = pd.read_csv('../train_fold.csv')
    data['filepath'] = data['image'].apply(
        lambda x: os.path.join('../', 'train_images', x))

    target_encoder = LabelEncoder()

    data['label_group'] = target_encoder.fit_transform(data['label_group'])

    train = data[data['fold'] != fold].reset_index(drop=True)
    valid = data[data['fold'] == fold].reset_index(drop=True)
    # Defining DataSet
    train_dataset = ShopeeDataset(
        csv=train,
        transforms=get_transforms(img_size=DIM[0], trans_type='train'),
        mode='train',
    )

    valid_dataset = ShopeeDataset(
        csv=valid,
        transforms=get_transforms(img_size=DIM[0], trans_type='valid'),
        mode='train',
    )

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=TRAIN_BATCH_SIZE,
                                               pin_memory=True,
                                               drop_last=True,
                                               num_workers=NUM_WORKERS)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=VALID_BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )
    # get adaptive margin
    tmp = np.sqrt(
        1 / np.sqrt(data['label_group'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # Defining Model for specific fold
    if model_version == "V1":
        model = ShopeeNet(**model_params)
    elif model_version == "V2":
        model = ShopeeNetV2(**model_params)
    else:
        model = ShopeeNetV3(**model_params)
    model.to(DEVICE)

    def fetch_loss(loss_type=None):
        if loss_type is None:
            loss = nn.CrossEntropyLoss()
        elif loss_type == 'arcface':
            loss = ArcFaceLossAdaptiveMargin(margins=margins,
                                             out_dim=model_params['n_classes'],
                                             s=80)
        return loss

    criterion = fetch_loss()
    criterion.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=scheduler_params['lr_start'])
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, EPOCHS)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=1,
        after_scheduler=scheduler_cosine)

    #Defining LR SChe
    scheduler = None

    # THE ENGINE LOOP
    best_loss = 2 << 13

    for epoch in range(EPOCHS):
        scheduler_warmup.step(epoch - 1)
        train_loss = train_fn(train_loader,
                              model,
                              criterion,
                              optimizer,
                              DEVICE,
                              epoch_th=epoch,
                              scheduler=scheduler)
        valid_loss = eval_fn(valid_loader, model, criterion, DEVICE)

        print(
            'Fold {} | Epoch {}/{} | Training | Loss: {:.4f} | Valid | Loss: {:.4f}'
            .format(fold, epoch + 1, EPOCHS, train_loss['loss'].avg,
                    valid_loss['loss'].avg))
        with open(log_name, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([
                fold, epoch + 1, train_loss['loss'].avg, valid_loss['loss'].avg
            ])

        if valid_loss['loss'].avg < best_loss:
            best_loss = valid_loss['loss'].avg
            torch.save(
                model.state_dict(),
                os.path.join(
                    "./models", model_name,
                    f'{model_version}_fold_{fold}_model_{model_params["model_name"]}_IMG_SIZE_{DIM[0]}_{model_params["loss_module"]}.bin'
                ))
            print('best model found for epoch {}'.format(epoch))
Example #24
0
import numpy as np
from tqdm import tqdm
from scipy import stats
import ttach as tta

from dataset import TestDataset, get_transforms, get_tta_transforms
from utils import get_kfold_model, get_model_by_name, Config

parser = ArgumentParser()
parser.add_argument("--weights", type=str, default="../runs/exp12/")
parser.add_argument("--tta", type=str, default='no')
opt = parser.parse_args()
cfg = Config()

desc_test = os.path.join(cfg.ds_folder, 'test.csv')
_, transform_test = get_transforms(cfg.img_size)
valid_data = TestDataset(desc_test,
                         data_folder=os.path.join(cfg.ds_folder, "test"),
                         transform=transform_test)
test_loader = DataLoader(dataset=valid_data, batch_size=cfg.bs, shuffle=False)

models = []
for path in get_kfold_model(opt.weights):
    model = get_model_by_name(cfg.model_name)
    model.load_state_dict(torch.load(path)['state_dict'])
    if opt.tta == 'yes':
        model = tta.ClassificationTTAWrapper(model,
                                             get_tta_transforms(),
                                             merge_mode='mean')
    models.append(model)
Example #25
0
def main():
    torch.cuda.set_device(1)
    # get dataframe
    df, out_dim = get_df(args.kernel_type, args.data_dir, args.train_step)
    print(f"out_dim = {out_dim}")

    # get adaptive margin
    tmp = np.sqrt(1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # get augmentations
    transforms_train, transforms_val = get_transforms(args.image_size)
    print("\ndata augmentation is done!\n")

    #extract images in folder 0 as demo
    df_demo_0 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/0')]
    df_demo_1 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/1')]
    df_demo_2 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/2')]
    df_demo_3 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/3')]

    df_demo = df_demo_0.append([df_demo_1, df_demo_2, df_demo_3])
    # get train and valid dataset
    df = df_demo

    df_train = df[df['fold'] != args.fold]
    df_valid = df[df['fold'] == args.fold].reset_index(drop=True).query("index % 15==0")

    dataset_train = LandmarkDataset(df_train, 'train', 'train', transform=transforms_train)
    dataset_valid = LandmarkDataset(df_valid, 'train', 'val', transform=transforms_val)
    valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True)

    print("dataset has been prepared!\n")
    # model
    print(torch.cuda.current_device())
    model = ModelClass(args.enet_type, out_dim=out_dim)
    model = nn.DataParallel(model, device_ids=[1, 3]).to("cuda:1, 3")

    # loss func
    def criterion(logits_m, target):
        arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80)
        loss_m = arc(logits_m, target, out_dim)
        return loss_m

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.init_lr)

    # load pretrained
    if len(args.load_from) > 0:
        # Todo:
        checkpoint = torch.load(args.load_from, map_location=lambda storage, loc: storage.cuda(3))
        state_dict = checkpoint['model_state_dict']
        state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}    
        if args.train_step == 1:
            del state_dict['metric_classify.weight']
            model.load_state_dict(state_dict, strict=False)
        else:
            model.load_state_dict(state_dict, strict=True)        
#             if 'optimizer_state_dict' in checkpoint:
#                 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])   
        del checkpoint, state_dict
        torch.cuda.empty_cache()
        import gc
        gc.collect()


    # lr scheduler
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs-1)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine)

    # train & valid loop
    gap_m_max = 0.
    model_file = os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}.pth')
    for epoch in range(args.start_from_epoch, args.n_epochs+1):

        print(time.ctime(), 'Epoch:', epoch)
        train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size,
                                                   num_workers=args.num_workers,
                                                   shuffle=True, drop_last=True)

        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, acc_m, gap_m = val_epoch(model, valid_loader, criterion)
        scheduler_warmup.step(epoch-1)
        if args.local_rank == 0:
            content = time.ctime() + ' ' + f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc_m: {(acc_m):.6f}, gap_m: {(gap_m):.6f}.'
            print(content)
            with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender:
                appender.write(content + '\n')

            print('gap_m_max ({:.6f} --> {:.6f}). Saving model ...'.format(gap_m_max, gap_m))
            torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        }, model_file)            
            gap_m_max = gap_m

        if epoch == args.stop_at_epoch:
            print(time.ctime(), 'Training Finished!')
            break

    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}_final.pth'))
Example #26
0
def main():
    '''
    ####################################################
    # stone data 데이터셋 : dataset.get_df_stone
    ####################################################
    '''
    df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone(
        k_fold=args.k_fold,
        out_dim=args.out_dim,
        data_dir=args.data_dir,
        data_folder=args.data_folder,
        use_meta=args.use_meta,
        use_ext=args.use_ext)

    transforms_train, transforms_val = get_transforms(args.image_size)

    LOGITS = []
    PROBS = []
    TARGETS = []
    dfs = []

    folds = range(args.k_fold)
    for fold in folds:
        print(f'Evaluate data fold{str(fold)}')
        df_valid = df_train[df_train['fold'] == fold]

        # batch_normalization에서 배치 사이즈 1인 경우 에러 발생할 수 있으므로, 데이터 한개 버림
        if len(df_valid) % args.batch_size == 1:
            df_valid = df_valid.sample(len(df_valid) - 1)

        if args.DEBUG:
            df_valid = pd.concat([
                df_valid[df_valid['target'] == target_idx].sample(
                    args.batch_size * 3),
                df_valid[df_valid['target'] != target_idx].sample(
                    args.batch_size * 3)
            ])

        dataset_valid = MMC_ClassificationDataset(df_valid,
                                                  'valid',
                                                  meta_features,
                                                  transform=transforms_val)
        valid_loader = torch.utils.data.DataLoader(
            dataset_valid,
            batch_size=args.batch_size,
            num_workers=args.num_workers)

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_no_ext':
            model_file = os.path.join(
                args.model_dir,
                f'{args.kernel_type}_best_no_ext_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim)
        model = model.to(device)

        # model summary
        if args.use_meta:
            pass
            # 코드 확인이 필요함
            # summary(model, [(3, args.image_size, args.image_size), n_meta_features])
        else:
            if fold == 0:  # 한번만
                summary(model, (3, args.image_size, args.image_size))

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()
        '''
        ####################################################
        # stone data를 위한 평가함수 : val_epoch_stonedata
        ####################################################
        '''
        this_LOGITS, this_PROBS, this_TARGETS = val_epoch_stonedata(
            model,
            valid_loader,
            target_idx,
            is_ext=df_valid['is_ext'].values,
            n_test=8,
            get_output=True)
        LOGITS.append(this_LOGITS)
        PROBS.append(this_PROBS)
        TARGETS.append(this_TARGETS)
        dfs.append(df_valid)

    dfs = pd.concat(dfs).reset_index(drop=True)
    dfs['pred'] = np.concatenate(PROBS).squeeze()[:, target_idx]

    Accuracy = (round(dfs['pred']) == dfs['target']).mean() * 100.
    auc_all_raw = roc_auc_score(dfs['target'] == target_idx, dfs['pred'])

    dfs2 = dfs.copy()
    for i in folds:
        dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i,
                                                       'pred'].rank(pct=True)
    auc_all_rank = roc_auc_score(dfs2['target'] == target_idx, dfs2['pred'])

    if args.use_ext:
        # 외부데이터를 사용할 경우, 외부데이터를 제외하고 모델을 따로 평가해본다.
        dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True)
        auc_no_ext_raw = roc_auc_score(dfs3['target'] == target_idx,
                                       dfs3['pred'])

        for i in folds:
            dfs3.loc[dfs3['fold'] == i,
                     'pred'] = dfs3.loc[dfs3['fold'] == i,
                                        'pred'].rank(pct=True)
        auc_no_ext_rank = roc_auc_score(dfs3['target'] == target_idx,
                                        dfs3['pred'])

        content = time.ctime() + ' ' + f'Eval {args.eval}:\nAccuracy : {Accuracy:.5f}\n' \
                                       f'auc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\n' \
                                       f'auc_no_ext_raw : {auc_no_ext_raw:.5f}\nauc_no_ext_rank : {auc_no_ext_rank:.5f}\n'
    else:
        content = time.ctime() + ' ' + f'Eval {args.eval}:\nAccuracy : {Accuracy:.5f}\n' \
                  f'AUC_all_raw : {auc_all_raw:.5f}\nAUC_all_rank : {auc_all_rank:.5f}\n'

    # 로그파일 맨 뒤에 결과 추가해줌
    print(content)
    with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'),
              'a') as appender:
        appender.write(content + '\n')

    np.save(
        os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'),
        dfs['pred'].values)

    # 결과 csv 저장
    dfs[['filepath', 'patient_id', 'target', 'pred'
         ]].to_csv(os.path.join(args.oof_dir,
                                f'{args.kernel_type}_{args.eval}_oof.csv'),
                   index=True)
Example #27
0
def predict_image(image_path):

    OUTPUTS = []
    n_test = 8
    transforms_train, transforms_val = get_transforms(config.image_size)

    dataset_test = MelanomaDataset(None,
                                   'test',
                                   None,
                                   transform=transforms_val,
                                   image_path=image_path)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=0)

    for fold in range(5):
        model_file = os.path.join(
            config.model_dir, f'{config.kernel_type}_best_o_fold{fold}.pth')

        ModelClass = Effnet_Melanoma
        model = ModelClass(config.enet_type, out_dim=config.out_dim)
        model = model.to(config.device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file,
                                             map_location=config.device),
                                  strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file, map_location=config.device)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        model.eval()

        LOGITS = []
        PROBS = []

        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if config.use_meta:
                    data, meta = data
                    data, meta = data.to(config.device), meta.to(config.device)
                    logits = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    probs = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    for I in range(n_test):
                        l = model(get_trans(data, I), meta)
                        logits += l
                        probs += l.softmax(1)
                else:
                    data = data.to(config.device)
                    logits = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    probs = torch.zeros(
                        (data.shape[0], config.out_dim)).to(config.device)
                    for I in range(n_test):
                        l = model(get_trans(data, I))
                        logits += l
                        probs += l.softmax(1)
                logits /= n_test
                probs /= n_test

                LOGITS.append(logits.detach().cpu())
                PROBS.append(probs.detach().cpu())

        LOGITS = torch.cat(LOGITS).numpy()
        PROBS = torch.cat(PROBS).numpy()

        OUTPUTS.append(PROBS[:, config.mel_idx])

    #If you are predicting on your own moles, your don't need to rank the probability
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
        pred += probs
    pred /= len(OUTPUTS)

    return round(pred[0], 8)
    def __init__(self,
                 path,
                 hparams=None,
                 teacher=None,
                 lr=1e-3,
                 bs=128,
                 imsize=128,
                 reg=True,
                 n_patches=9,
                 is_stack=False,
                 epochs=None,
                 norm_output=True,
                 ordinal=False):
        super().__init__()
        self.path = path
        self.lr = lr
        self.bs = bs
        self.imsize = imsize
        self.reg = reg
        self.n_patches = n_patches
        self.is_stack = is_stack
        self.epochs = epochs
        self.norm_output = norm_output
        self.ordinal = ordinal

        if reg:
            if is_stack:
                self.model = EffNetPatch(n=5 if ordinal else 1,
                                         n_patches=n_patches)
            else:
                self.model = EffNetConcat(n=5 if ordinal else 1,
                                          n_patches=n_patches)

            if ordinal:
                self.loss_fn = nn.BCEWithLogitsLoss()
                # Try Focal Loss
            else:
                # self.loss_fn = F.mse_loss
                self.loss_fn = nn.SmoothL1Loss()
                # self.loss_fn = nn.L1Loss()
        else:
            if is_stack:
                self.model = EffNetPatch(n=6, n_patches=n_patches)
            else:
                self.model = EffNetConcat(n=6, n_patches=n_patches)
            # self.loss_fn = LabelSmoothingLoss(6, 0.1)
            self.loss_fn = F.cross_entropy

        self.teacher = BaselineModel.load_from_checkpoint(
            teacher,
            map_location='cuda:0',
            path=path,
            bs=bs,
            imsize=imsize,
            lr=lr,
            n_patches=n_patches,
            reg=True,
            is_stack=is_stack,
            norm_output=norm_output,
            ordinal=ordinal).eval()
        self.teacher.apply(lambda m: m.requires_grad_(False))

        # log hparams
        tfms = {
            'local':
            str(
                get_transforms(imsize,
                               train=True,
                               local=True,
                               is_stack=is_stack,
                               n_patches=n_patches)),
            'global':
            str(
                get_transforms(imsize,
                               train=True,
                               local=False,
                               is_stack=is_stack,
                               n_patches=n_patches))
        }
        self.hparams = {
            'lr': lr,
            'bs': bs,
            'patch_size': imsize,
            'tfms': tfms,
            'n_patches': n_patches,
            'loss_fn': self.loss_fn.__class__.__name__,
            'is_stack': is_stack,
            'path': str(self.path),
            'ordinal': ordinal,
            'norm_target': norm_output
        }
Example #29
0
def main():
    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir_2020, args.data_dir_2019,
        args.data_dir_2018, args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    # load model
    models = []
    for fold in range(1):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim,
            pretrained=True,
            # meta_model=args.meta_model
        )
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()
        models.append(model)

    # predict
    PROBS = []
    with torch.no_grad():
        for (data) in tqdm(test_loader):
            if args.use_meta:
                data, meta = data
                data, meta = data.to(device), meta.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
            else:
                data = data.to(device)
                probs = torch.zeros((data.shape[0], args.out_dim)).to(device)
                for model in models:
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

            probs /= args.n_test
            probs /= len(models)

            PROBS.append(probs.detach().cpu())

    PROBS = torch.cat(PROBS).numpy()

    # save cvs
    df_test['target'] = PROBS[:, mel_idx]
    df_test['image_name'] = df_test['image']
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
Example #30
0
def main():

    df, df_test, meta_features, n_meta_features, mel_idx = get_df(
        args.kernel_type, args.out_dim, args.data_dir, args.data_folder,
        args.use_meta)

    transforms_train, transforms_val = get_transforms(args.image_size)

    if args.DEBUG:
        df_test = df_test.sample(args.batch_size * 3)
    dataset_test = MelanomaDataset(df_test,
                                   'test',
                                   meta_features,
                                   transform=transforms_val)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    print(f'\nPredicting test set using {args.enet_type} ...')

    OUTPUTS = []
    for fold in range(5):

        if args.eval == 'best':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth')
        elif args.eval == 'best_20':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth')
        if args.eval == 'final':
            model_file = os.path.join(
                args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth')

        model = ModelClass(
            args.enet_type,
            n_meta_features=n_meta_features,
            n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')],
            out_dim=args.out_dim)
        model = model.to(device)

        try:  # single GPU model_file
            model.load_state_dict(torch.load(model_file), strict=True)
        except:  # multi GPU model_file
            state_dict = torch.load(model_file)
            state_dict = {
                k[7:] if k.startswith('module.') else k: state_dict[k]
                for k in state_dict.keys()
            }
            model.load_state_dict(state_dict, strict=True)

        if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1:
            model = torch.nn.DataParallel(model)

        model.eval()

        PROBS = []

        with torch.no_grad():
            for (data) in tqdm(test_loader):

                if args.use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I), meta)
                        probs += l.softmax(1)
                else:
                    data = data.to(device)
                    probs = torch.zeros(
                        (data.shape[0], args.out_dim)).to(device)
                    for I in range(args.n_test):
                        l = model(get_trans(data, I))
                        probs += l.softmax(1)

                probs /= args.n_test

                PROBS.append(probs.detach().cpu())

        PROBS = torch.cat(PROBS).numpy()
        OUTPUTS.append(PROBS[:, mel_idx])

    # Rank per fold (If you are predicting on your own moles, your don't need to rank the probability)
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
        pred += pd.Series(probs).rank(pct=True).values
    pred /= len(OUTPUTS)

    df_test['target'] = pred
    df_test[['image_name', 'target'
             ]].to_csv(os.path.join(args.sub_dir,
                                    f'sub_{args.kernel_type}_{args.eval}.csv'),
                       index=False)
    print('\nSaved submission in -> ./subs')