Example #1
0
    def _train_fold(self, train_df, target_df, fold):
        tr_x, va_x = train_df[fold == 0], train_df[fold > 0]
        tr_y, va_y = target_df[fold == 0], target_df[fold > 0]
        weight = fold.max()
        self.weights.append(weight)

        model = factory.get_model(self.cfg.model)
        model.fit(tr_x, tr_y, va_x, va_y, self.cat_features)
        va_pred = model.predict(va_x, self.cat_features)

        if self.cfg.data.target.reconvert_type:
            va_y = getattr(np, self.cfg.data.target.reconvert_type)(va_y)
            va_pred = getattr(np, self.cfg.data.target.reconvert_type)(va_pred)
            va_pred = np.where(va_pred >= 0, va_pred, 0)

        self.models.append(model)
        self.oof[va_x.index] = va_pred.copy()

        score = factory.get_metrics(self.cfg.common.metrics.name)(va_y,
                                                                  va_pred)
        self.scores.append(score)

        if self.cfg.model.name in ['lightgbm', 'catboost', 'xgboost']:
            importance_fold_df = pd.DataFrame()
            fold_importance = model.extract_importances()
            importance_fold_df['feature'] = train_df.columns
            importance_fold_df['importance'] = fold_importance
            self.feature_importance_df = pd.concat(
                [self.feature_importance_df, importance_fold_df], axis=0)
Example #2
0
def main():
    log_dir = Path('../logs/clf_resnet18_20200703133354_0.827/')
    cfg = dh.load(log_dir / 'config.yml')
    oof = np.load(log_dir / 'oof.npy')

    train_df = pd.read_csv('../data/input/train.csv')
    cfg.model.multi_gpu = False

    model = factory.get_model(cfg).to(device)
    model.load_state_dict(torch.load(log_dir / 'weight_best.pt'))

    for id_ in tqdm(train_df['image_id']):
        grad_cam_array = gradcam(model, id_)
        np.save(f'../data/input/grad_cam/{id_}.npy', grad_cam_array)
Example #3
0
    def __init__(self, cfg):
        super(LightningModuleSeg, self).__init__()
        assumed_models = {'unet', 'clsunet', 'msunet', 'msclsunet'}
        assert cfg.Model.model_arch in assumed_models
        self.cfg = cfg

        self.model_arch = cfg.Model.model_arch
        self.softmax = (cfg.Model.output == 'softmax')
        self.num_class = len(cfg.General.labels)
        self.num_class_seg = cfg.Model.out_channel
        self.skip_first_class = self.num_class < cfg.Model.out_channel

        self.net = get_model(cfg)
        self.loss = get_loss(cfg)
        self.metrics_keys = ['dice0', 'dice', 'dice_p', 'dice_n']
        self.tta_enable = {'hflip', 'vflip', 'rotate90', 'rotate180'}
        self.tta = set(cfg.Augmentation.tta)  # ex. {'hflip', 'vflip'}
        self.applied_tta_num = len(self.tta & self.tta_enable)
        print(f'Applied TTA: {self.tta & self.tta_enable}')
        self.pred_cls_thres = 0.6
Example #4
0
def predict_fold(run_name, df, cfg, fold_num):
    test_loader = factory.get_dataloader(df, cfg=cfg.data.test)

    test_preds = np.zeros((len(test_loader.dataset),
                           cfg.model.n_classes * cfg.data.test.tta.iter_num))

    test_preds_tta = np.zeros((len(test_preds), cfg.model.n_classes))

    test_batch_size = test_loader.batch_size

    model = factory.get_model(cfg, is_train=False).to(device)
    model.load_state_dict(
        torch.load(f'../logs/{run_name}/weight_best_{fold_num}.pt'))

    model.eval()
    for t in range(cfg.data.test.tta.iter_num):
        with torch.no_grad():
            for i, (images) in enumerate(test_loader):
                images = images.to(device)

                preds = model(images.float())
                test_preds[i * test_batch_size:(i + 1) * test_batch_size,
                           t * cfg.model.n_classes:(t + 1) *
                           cfg.model.n_classes] = preds.cpu().detach().numpy()

    for i in range(cfg.model.n_classes):
        preds_col_idx = [
            i + cfg.model.n_classes * j
            for j in range(cfg.data.test.tta.iter_num)
        ]
        test_preds_tta[:, i] = np.mean(test_preds[:, preds_col_idx],
                                       axis=1).reshape(-1)

    test_preds_tta = 1 / (1 + np.exp(-test_preds_tta))

    return test_preds_tta
Example #5
0
 def get_net(self):
     return get_model(self.cfg.Model)
Example #6
0
def train_model(run_name, df, fold_df, cfg):
    oof = np.zeros(len(df))
    cv = 0

    for fold_, col in enumerate(fold_df.columns):
        print(f'\n========================== FOLD {fold_} ... ==========================\n')
        logging.debug(f'\n========================== FOLD {fold_} ... ==========================\n')

        trn_x, val_x = df[fold_df[col] == 0], df[fold_df[col] > 0]
        val_y = val_x.loc[:33126][cfg.common.target]

        val_org_idx = np.where(val_x.index <= 33126)[0]

        train_loader = factory.get_dataloader(trn_x, cfg.data.train)
        valid_loader = factory.get_dataloader(val_x, cfg.data.valid)

        model = factory.get_model(cfg).to(device)
        
        criterion = factory.get_loss(cfg)
        optimizer = factory.get_optim(cfg, model.parameters())
        scheduler = factory.get_scheduler(cfg, optimizer)

        best_epoch = -1
        best_val_score = -np.inf
        mb = master_bar(range(cfg.data.train.epochs))

        train_loss_list = []
        val_loss_list = []
        val_score_list = []

        for epoch in mb:
            start_time = time.time()

            model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

            valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

            val_score = factory.get_metrics(cfg.common.metrics.name)(val_y, valid_preds[val_org_idx])

            train_loss_list.append(avg_loss)
            val_loss_list.append(avg_val_loss)
            val_score_list.append(val_score)

            if cfg.scheduler.name != 'ReduceLROnPlateau':
                scheduler.step()
            elif cfg.scheduler.name == 'ReduceLROnPlateau':
                scheduler.step(avg_val_loss)
            
            elapsed = time.time() - start_time
            mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')
            logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')

            if val_score > best_val_score:
                best_epoch = epoch + 1
                best_val_score = val_score
                best_valid_preds = valid_preds
                if cfg.model.multi_gpu:
                    best_model = model.module.state_dict()
                else:
                    best_model = model.state_dict()

        oof[val_x.index] = best_valid_preds.reshape(-1)
        cv += best_val_score * fold_df[col].max()

        torch.save(best_model, f'../logs/{run_name}/weight_best_{fold_}.pt')
        save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list, fold_)

        print(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')
        logging.debug(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')

    print('\n\n===================================\n')
    print(f'CV: {cv:.6f}')
    logging.debug(f'\n\nCV: {cv:.6f}')
    print('\n===================================\n\n')

    result = {
        'cv': cv,
    }

    np.save(f'../logs/{run_name}/oof.npy', oof)
    
    return result
Example #7
0
        os.mkdir(train_output_dir)
    test_output_dir = os.path.join(output_dir, "test/")
    if not os.path.exists(test_output_dir):
        os.mkdir(test_output_dir)

    # Load data
    train_loader, test_loader = factory.get_data(args.data, args.batch_size,
                                                 args.workers)

    # Load models
    disc = factory.get_discriminator(args.data, args.disc).to(args.device)
    disc2 = factory.get_discriminator(args.data, args.disc).to(args.device)
    gen = factory.get_generator(args.data, args.gen,
                                args.deconv).to(args.device)

    model = factory.get_model(args, disc, disc2, gen)

    if args.test:
        disc.load_state_dict(torch.load(model_output_dir + 'disc.pth'))
        gen.load_state_dict(torch.load(model_output_dir + 'gen.pth'))
        print("Loaded model")
        test_loss = test(args, args.epochs, disc, gen, test_loader,
                         test_output_dir)
        print("Outputted test results")
    else:
        with open(output_dir + '/config.txt', 'w') as f:
            json.dump(args.__dict__, f)

        model.train(train_loader, test_loader, output_dir, train_output_dir,
                    test_output_dir, model_output_dir)
Example #8
0
def train_cnn(run_name, trn_x, val_x, trn_y, val_y, cfg):

    train_loader = factory.get_dataloader(trn_x, trn_y, cfg.data.train)
    valid_loader = factory.get_dataloader(val_x, val_y, cfg.data.valid)

    model = factory.get_model(cfg).to(device)
    
    criterion = factory.get_loss(cfg)
    optimizer = factory.get_optim(cfg, model.parameters())
    scheduler = factory.get_scheduler(cfg, optimizer)

    best_epoch = -1
    best_val_score = -np.inf
    best_coef = []
    mb = master_bar(range(cfg.data.train.epochs))

    train_loss_list = []
    val_loss_list = []
    val_score_list = []
    initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5]

    for epoch in mb:
        start_time = time.time()

        model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

        valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

        if cfg.model.n_classes > 1:
            val_score = quadratic_weighted_kappa(val_y, valid_preds.argmax(1))
            cm = confusion_matrix(val_y, valid_preds.argmax(1))
        else:
            optR = QWKOptimizedRounder()
            optR.fit(valid_preds.copy(), val_y, initial_coef)
            coef = optR.coefficients()
            valid_preds_class = optR.predict(valid_preds.copy(), coef)
            val_score = quadratic_weighted_kappa(val_y, valid_preds_class)
            cm = confusion_matrix(val_y, valid_preds_class)
        
        # cm = np.round(cm / np.sum(cm, axis=1, keepdims=True), 3)

        train_loss_list.append(avg_loss)
        val_loss_list.append(avg_val_loss)
        val_score_list.append(val_score)

        if cfg.scheduler.name != 'ReduceLROnPlateau':
            scheduler.step()
        elif cfg.scheduler.name == 'ReduceLROnPlateau':
            scheduler.step(avg_val_loss)
        
        elapsed = time.time() - start_time
        mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')
        logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')

        if val_score > best_val_score:
            best_epoch = epoch + 1
            best_val_score = val_score
            best_valid_preds = valid_preds
            if cfg.model.multi_gpu:
                best_model = model.module.state_dict()
            else:
                best_model = model.state_dict()
            if cfg.model.n_classes == 1:
                best_coef = coef
            best_cm = cm

    print('\n\nCONFUSION MATRIX')
    logging.debug('\n\nCONFUSION MATRIX')
    print(cm)
    logging.debug(cm)

    print('\n\n===================================\n')
    print(f'CV: {best_val_score:.6f}')
    print(f'BEST EPOCH: {best_epoch}')
    logging.debug(f'\n\nCV: {best_val_score:.6f}')
    logging.debug(f'BEST EPOCH: {best_epoch}\n\n')
    print('\n===================================\n\n')

    result = {
        'cv': best_val_score,
    }

    np.save(f'../logs/{run_name}/oof.npy', best_valid_preds)
    np.save(f'../logs/{run_name}/best_coef.npy', best_coef)
    torch.save(best_model, f'../logs/{run_name}/weight_best.pt')
    save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list)
    
    return result
Example #9
0
def train_ordinal_reg(run_name, trn_x, val_x, trn_y, val_y, cfg):

    ordinal_val_preds = np.zeros_like(val_y)

    for i, col in enumerate(trn_y.columns[1:]):
        print(f'\n\n====================  {col}  ====================')
        logging.debug(f'\n\n====================  {col}  ====================')

        train_loader = factory.get_dataloader(trn_x, trn_y[col], cfg.data.train)
        valid_loader = factory.get_dataloader(val_x, val_y[col], cfg.data.valid)

        model = factory.get_model(cfg).to(device)
        
        criterion = factory.get_loss(cfg)
        optimizer = factory.get_optim(cfg, model.parameters())
        scheduler = factory.get_scheduler(cfg, optimizer)

        best_epoch = -1
        best_val_loss = np.inf
        mb = master_bar(range(cfg.data.train.epochs))

        train_loss_list = []
        val_loss_list = []
        val_score_list = []
        initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5]

        for epoch in mb:
            start_time = time.time()

            model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

            valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

            train_loss_list.append(avg_loss)
            val_loss_list.append(avg_val_loss)

            if cfg.scheduler.name != 'ReduceLROnPlateau':
                scheduler.step()
            elif cfg.scheduler.name == 'ReduceLROnPlateau':
                scheduler.step(avg_val_loss)

            elapsed = time.time() - start_time
            mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
            logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')

            if avg_val_loss < best_val_loss:
                best_epoch = epoch + 1
                best_val_loss = avg_val_loss
                best_valid_preds = valid_preds
                if cfg.model.multi_gpu:
                    best_model = model.module.state_dict()
                else:
                    best_model = model.state_dict()

        print(f'epoch: {best_epoch}   loss: {best_val_loss}')

        ordinal_val_preds[:, i] = 1 / (1 + np.exp(-1 * best_valid_preds))

        np.save(f'../logs/{run_name}/oof_{col}.npy', best_valid_preds)
        torch.save(best_model, f'../logs/{run_name}/weight_best_{col}.pt')

    valid_preds = np.sum(ordinal_val_preds, axis=1)
    val_y = (np.sum(val_y.values, axis=1) - 1).astype(int)

    optR = QWKOptimizedRounder()
    optR.fit(valid_preds.copy(), val_y, initial_coef)
    best_coef = optR.coefficients()
    valid_preds_class = optR.predict(valid_preds.copy(), best_coef)
    best_val_score = quadratic_weighted_kappa(val_y, valid_preds_class)
    cm = confusion_matrix(val_y, valid_preds_class)

    print('\n\nCONFUSION MATRIX')
    logging.debug('\n\nCONFUSION MATRIX')
    print(cm)
    logging.debug(cm)

    print('\n\n===================================\n')
    print(f'CV: {best_val_score:.6f}')
    logging.debug(f'\n\nCV: {best_val_score:.6f}')
    print('\n===================================\n\n')

    result = {
        'cv': best_val_score,
    }

    np.save(f'../logs/{run_name}/best_coef.npy', best_coef)
    
    return result