Ejemplo n.º 1
0
def get_oof(df_train):
    PROBS = []
    oof = []
    for fold in range(kfold):
        df_valid = df_train[df_train["val_fold"] == fold]
        val_dataset = Albu_Dataset(
            df=df_valid,
            phase="train",
            transforms=Albu_Transform(image_size=image_size),
            aug=val_aug,
            use_meta=use_meta)
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers)
        model_path_fold_1 = model_path + "_fold{}_1.pth".format(fold)

        n_meta_features = val_dataset.n_meta_features  #無理やり組み込んだ
        target_index = val_dataset.target_index  #無理やり組み込んだ
        model = Ef_Net(n_meta_features=n_meta_features,
                       out_features=out_features)
        model = model.to(device)
        model.load_state_dict(torch.load(model_path_fold_1))
        model.eval()
        this_PROBS = val_epoch(model,
                               val_loader,
                               is_ext=df_valid['is_ext'].values,
                               n_test=n_val,
                               get_output=True)
        PROBS.append(this_PROBS)
        oof.append(df_valid)
    oof = pd.concat(oof).reset_index(drop=True)
    func5(oof, PROBS, target_index)
    return oof
Ejemplo n.º 2
0
def main(df_test, imfolder_test):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device_CPU_GPU:", device)
    predict = torch.zeros((len(df_test), 1),
                          dtype=torch.float32,
                          device=device)
    test_dataset = Albu_Dataset(
        df=df_test,
        imfolder=imfolder_test,
        phase="test",
        transforms=Albu_Transform(image_size=image_size),
        aug=test_aug)
    for fold in range(kfold):
        print('=' * 20, 'Fold', fold, '=' * 20)
        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)
        model_path_fold = model_path + model_name + "_fold{}.pth".format(fold)
        model = Ef_Net()
        model.load_state_dict(torch.load(model_path_fold))
        model = model.to(device)
        model.eval()
        with torch.no_grad():
            tta_predict = torch.zeros((len(test_dataset), 1),
                                      dtype=torch.float32,
                                      device=device)
            for _ in range(TTA):
                for i, x_test in enumerate(test_loader):
                    x_test = torch.tensor(x_test,
                                          device=device,
                                          dtype=torch.float32)
                    output_test = model(x_test)
                    output_test = torch.sigmoid(output_test)
                    tta_predict[i * test_loader.batch_size:i *
                                test_loader.batch_size +
                                x_test.shape[0]] += output_test
            predict += tta_predict / TTA
    predict /= kfold
    return predict
Ejemplo n.º 3
0
def get_predict(df_test):
    print("device_CPU_GPU:", device)
    predict = torch.zeros((len(df_test), 1), dtype=torch.float32, device=device) 
    OUTPUTS = []
    test_dataset = Albu_Dataset(df=df_test, phase="test", transforms=Albu_Transform(image_size=image_size), aug=test_aug, use_meta=use_meta)
    #test_datasetの内容がfoldごとで変化がないのでここで読んでオーケイ
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    for fold in range(kfold):
        print('=' * 20, 'Fold', fold, '=' * 20)
        model_path_fold_1 = model_path + "_fold{}_1.pth".format(fold)
        print(model_path_fold_1)
        n_meta_features =  test_dataset.n_meta_features #無理やり組み込んだ
        target_index = test_dataset.target_index #無理やり組み込んだ
        model = Ef_Net(n_meta_features=n_meta_features, out_features=out_features)
        model = model.to(device)
        model.load_state_dict(torch.load(model_path_fold_1))
        model.eval()

        LOGITS = []
        PROBS = []
        bar = tqdm(test_loader,position=0,leave=True)
        with torch.no_grad():
            for (data) in bar:
                if use_meta:
                    data, meta = data
                    data, meta = data.to(device), meta.to(device)
                    logits = torch.zeros((data.shape[0], out_features)).to(device)
                    probs = torch.zeros((data.shape[0], out_features)).to(device)
                    for I in range(TTA):
                        l = model(get_trans(data, I), meta)
                        logits += l
                        probs = func1(probs,l)
                else:
                    data = data.to(device)
                    logits = torch.zeros((data.shape[0], out_features)).to(device)
                    probs = torch.zeros((data.shape[0], out_features)).to(device)
                    for I in range(TTA):
                        l = model(get_trans(data, I))
                        logits += l
                        probs = func1(probs,l)
                logits /= TTA
                probs /= TTA
        
                LOGITS.append(logits.detach().cpu())
                PROBS.append(probs.detach().cpu())
                bar.set_description("get_predict")

        LOGITS = torch.cat(LOGITS).numpy()
        PROBS = torch.cat(PROBS).numpy()
        func6(OUTPUTS,PROBS,target_index)
    pred = np.zeros(OUTPUTS[0].shape[0])
    for probs in OUTPUTS:
      probs = np.squeeze(probs)
      #rankにするか否か
      if False:
        pred += pd.Series(probs).rank(pct=True).values
      else:
        pred += pd.Series(probs).values
    pred /= len(OUTPUTS)
    df_test['target'] = pred
    return df_test
Ejemplo n.º 4
0
def main(df_train, df_test):
    #ロガーのセット
    logger = setup_logger(LOG_DIR, LOG_NAME)
    config.log_config(logger)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info("device_CPU_GPU:{}".format(device))
    oof = np.zeros((len(df_train), 1))  # Out Of Fold predictions
    preds = torch.zeros((len(df_test), 1), dtype=torch.float32, device=device)
    skf = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=1)

    #skf=5→インデックスをリストに格納して返す
    for fold, (train_idx, val_idx) in enumerate(
            skf.split(X=np.zeros(len(df_train)), y=df_train[target])):
        logger.info("{0}Fold{1}{2}".format("=" * 20, fold, "=" * 20))
        model_path_fold = model_path + model_name + "_fold{}.pth".format(
            fold)  # Path and filename to save model to
        best_val = 0  # Best validation score within this fold
        patience = es_patience  # Current patience counter

        #損失関数のセット
        criterion = nn.BCEWithLogitsLoss()
        logger.info("criterion:{}".format(str(criterion)))

        #モデルクラスの読み込み
        model = Ef_Net()
        model = model.to(device)
        logger.info("device_GPU_True:{}".format(
            next(model.parameters()).is_cuda))

        #オプティマイザー、スケジューラ―のセット
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        if USE_AMP:
            model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
        scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                      mode='max',
                                      patience=1,
                                      verbose=True,
                                      factor=0.2)

        #データセットインスタンスの生成
        train_dataset = Albu_Dataset(
            df=df_train.iloc[train_idx].reset_index(drop=True),
            phase="train",
            transforms=Albu_Transform(image_size=image_size),
            aug=train_aug)
        val_dataset = Albu_Dataset(
            df=df_train.iloc[val_idx].reset_index(drop=True),
            phase="train",
            transforms=Albu_Transform(image_size=image_size),
            aug=val_aug)
        #データローダーにセッット
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers)

        #ループ2(エポック単位)
        for epoch in range(epochs):
            start_time = time.time()
            #初期化
            correct = 0
            epoch_loss = 0
            #学習モードに切り替え
            model.train()
            #ループ処理3(ミニバッチ/ローダー単位)
            for x, y in train_loader:
                # print(x.dtype)
                #inputのxやlabelのyをcudaに乗せる+tensor,float32型にしている
                # x = torch.tensor(x, device=device, dtype=torch.float32)
                x = x.to(device)
                # print("is_cuda_x",x.is_cuda)
                # print(x.dtype)
                # y = torch.tensor(y, device=device, dtype=torch.float32)
                y = y.to(device)
                # print("is_cuda_y",y.is_cuda)

                #順伝播
                optimizer.zero_grad()
                output = model(x)
                #損失(誤差)の計算
                #タクラス分類の場合
                # y = y.long()
                loss = criterion(output, y.unsqueeze(1))
                # loss = criterion(output, y)

                #逆伝播
                if not USE_AMP:
                    loss.backward()
                else:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()

                optimizer.step()

                #予測➡roundいるのかどうか問題
                pred = torch.round(torch.sigmoid(output))
                # pred = torch.round(output)
                # pred = output.softmax(1)

                correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item(
                )  # tracking number of correctly predicted samples
                epoch_loss += loss.item()
            train_acc = correct / len(train_idx)

            #予測モードに切り替え→val
            model.eval()  # switch model to the evaluation mode
            val_preds = torch.zeros((len(val_idx), 1),
                                    dtype=torch.float32,
                                    device=device)
            with torch.no_grad(
            ):  # Do not calculate gradient since we are only predicting
                # Predicting on validation set
                for j, (x_val, y_val) in enumerate(val_loader):
                    x_val = torch.tensor(x_val,
                                         device=device,
                                         dtype=torch.float32)
                    y_val = torch.tensor(y_val,
                                         device=device,
                                         dtype=torch.float32)
                    output_val = model(x_val)
                    val_pred = torch.sigmoid(output_val)
                    # val_pred = output_val
                    # val_pred = output_val.softmax(1)

                    val_preds[j *
                              val_loader.batch_size:j * val_loader.batch_size +
                              x_val.shape[0]] = val_pred
                val_acc = accuracy_score(df_train.iloc[val_idx][target].values,
                                         torch.round(val_preds.cpu()))
                val_roc = roc_auc_score(df_train.iloc[val_idx][target].values,
                                        val_preds.cpu())

                #ログ抽出
                logger.info(
                    'Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'
                    .format(
                        epoch + 1, epoch_loss, train_acc, val_acc, val_roc,
                        str(
                            datetime.timedelta(seconds=time.time() -
                                               start_time))[:7]))

                #オプティマイザーはバッチ単位でstepする、スケジューラ―はエポック単位のためこのタイミング
                scheduler.step(val_roc)

                if val_roc >= best_val:
                    best_val = val_roc
                    patience = es_patience  # Resetting patience since we have new best validation accuracy
                    torch.save(model.state_dict(),
                               model_path_fold)  # Saving current best model
                else:
                    patience -= 1
                    if patience == 0:
                        print(
                            'Early stopping. Best Val roc_auc: {:.3f}'.format(
                                best_val))
                        logger.info(
                            'Early stopping. Best Val roc_auc: {:.3f}'.format(
                                best_val))
                        break

        model = Ef_Net()
        model.load_state_dict(torch.load(model_path_fold))
        model = model.to(device)
        model.eval()
        val_preds = torch.zeros((len(val_idx), 1),
                                dtype=torch.float32,
                                device=device)
        with torch.no_grad():
            # Predicting on validation set once again to obtain data for OOF
            for j, (x_val, y_val) in enumerate(val_loader):
                x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
                y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
                output_val = model(x_val)

                val_pred = torch.sigmoid(output_val)
                # val_pred = output_val
                # val_pred = output_val.softmax(1)

                val_preds[j * val_loader.batch_size:j * val_loader.batch_size +
                          x_val.shape[0]] = val_pred
            oof[val_idx] = val_preds.cpu().numpy()
    logger.info("train_finish")
    return oof
Ejemplo n.º 5
0
def run(fold):
    
    i_fold = fold

    if DEBUG:
        df_this = df_train[df_train['fold'] != i_fold].sample(batch_size * 3)
        df_valid = df_train[df_train['fold'] == i_fold].sample(batch_size * 3)
    else:
        df_this = df_train[df_train['fold'] != i_fold]
        df_valid = df_train[df_train['fold'] == i_fold]
    
    #データセット及びローダー
    dataset_train = Albu_Dataset(df=df_this,
                                phase="train", 
                                transforms=Albu_Transform(image_size=image_size),
                                aug=train_aug)
    dataset_valid = Albu_Dataset(df_valid,
                                phase="train", 
                                transforms=Albu_Transform(image_size=image_size),
                                aug=train_aug)
    train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    
    #モデルの読み込み
    model = Ef_Net(n_meta_features=n_meta_features, out_features=out_features)
    model = model.to(device)
    logger.info("device_GPU_True:{}".format(next(model.parameters()).is_cuda))

    auc_max = 0.
    auc_20_max = 0.
    model_file = f'{kernel_type}_best_fold{i_fold}.pth'
    model_file2 = f'{kernel_type}_best_o_fold{i_fold}.pth'
    #オプティマイザー
    optimizer = optim.Adam(model.parameters(), lr=init_lr)
    if use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
        
    #スケジューラ
    # scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, cosine_epo)
    # scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=warmup_epo, after_scheduler=scheduler_cosine)
    scheduler_warmup = ReduceLROnPlateau(optimizer=optimizer, mode='max', patience=1, verbose=True, factor=0.2)

    print(len(dataset_train), len(dataset_valid))

    for epoch in range(1, n_epochs+1):
        print(time.ctime(), 'Epoch:', epoch)
        scheduler_warmup.step(epoch-1)

        train_loss = train_epoch(model, train_loader, optimizer)
        val_loss, acc, auc, auc_20 = val_epoch(model, valid_loader, is_ext=df_valid['is_ext'].values)

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.'
        print(content)
        with open(f'log_{kernel_type}.txt', 'a') as appender:
            appender.write(content + '\n')

        if auc > auc_max:
            print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_max, auc))
            torch.save(model.state_dict(), model_file)
            auc_max = auc
        if auc_20 > auc_20_max:
            print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_20_max, auc_20))
            torch.save(model.state_dict(), model_file2)
            auc_20_max = auc_20

    scores.append(auc_max)
    scores_20.append(auc_20_max)
    torch.save(model.state_dict(), os.path.join(f'{kernel_type}_model_fold{i_fold}.pth'))
Ejemplo n.º 6
0
def get_fold(df_train):
    #ログのセット
    logger = setup_logger(LOG_DIR, LOG_NAME)
    config.log_config(logger)
    #初期化
    scores = []
    scores_20 = []
    PROBS = []
    dfs = []

    for fold in range(kfold):
        if fold not in fold_list:
            logger.info("Fold{0}はスキップされました".format(fold))
            continue
        logger.info("{0}Fold{1}{2}".format("=" * 20, fold, "=" * 20))
        model_path_fold_1 = model_path + "_fold{}_1.pth".format(
            fold)  # Path and filename to save model to
        model_path_fold_2 = model_path + "_fold{}_2.pth".format(
            fold)  # Path and filename to save model to
        best_val = 0  # Best validation score within this fold
        patience = es_patience  # Current patience counter
        val_auc_max = 0.
        val_auc_20_max = 0.

        #kfoldの番号ふり
        df_this = df_train[df_train["val_fold"] != fold]
        df_valid = df_train[df_train["val_fold"] == fold]
        #データセットインスタンスの生成
        train_dataset = Albu_Dataset(
            df=df_this,
            phase="train",
            transforms=Albu_Transform(image_size=image_size),
            aug=train_aug,
            use_meta=use_meta)
        val_dataset = Albu_Dataset(
            df=df_valid,
            phase="train",
            transforms=Albu_Transform(image_size=image_size),
            aug=val_aug,
            use_meta=use_meta)
        #データローダーにセッット
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers)
        #モデルクラスの読み込み
        n_meta_features = train_dataset.n_meta_features  #無理やり組み込んだ
        target_index = train_dataset.target_index  #無理やり組み込んだ
        model = Ef_Net(n_meta_features=n_meta_features,
                       out_features=out_features)
        model = model.to(device)
        logger.info("device_GPU_True:{}".format(
            next(model.parameters()).is_cuda))

        #オプティマイザー、スケジューラ―のセット
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        if USE_AMP:
            model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
        scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                      mode='max',
                                      patience=1,
                                      verbose=True,
                                      factor=0.2)

        #エポック
        for epoch in range(epochs):
            logger.info("{0} Epoch:{1}".format(time.ctime(), epoch))
            train_loss = train_epoch(model, train_loader, optimizer, logger)
            val_loss, acc, val_auc, val_auc_20 = val_epoch(
                model,
                val_loader,
                is_ext=df_valid['is_ext'].values,
                target_index=target_index)
            content = time.ctime(
            ) + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, val_auc: {(val_auc):.6f}, val_auc_20: {(val_auc_20):.6f}.'
            logger.info(content)

            #オプティマイザーはバッチ単位でstepする、スケジューラ―はエポック単位のためこのタイミング
            scheduler.step(val_auc)
            if val_auc <= val_auc_max and val_auc_20 <= val_auc_20_max:
                patience -= 1
                if patience == 0:
                    logger.info(
                        'Early stopping. Best Val roc_auc: {:.3f}'.format(
                            val_auc_max))
                    break

            if val_auc > val_auc_max:
                logger.info(
                    'val_auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(
                        val_auc_max, val_auc))
                logger.info("save_path={}".format(model_path_fold_1))
                val_auc_max = val_auc
                patience = es_patience  # Resetting patience since we have new best validation accuracy
                torch.save(model.state_dict(),
                           model_path_fold_1)  # Saving current best model
            if val_auc_20 > val_auc_20_max:
                logger.info(
                    'val_auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.
                    format(val_auc_20_max, val_auc_20))
                logger.info("save_path={}".format(model_path_fold_1))
                torch.save(model.state_dict(), model_path_fold_2)
                val_auc_20_max = val_auc_20

        scores.append(val_auc_max)
        scores_20.append(val_auc_20_max)
    logger.info("train_finish")