Ejemplo n.º 1
0
    def train(self, train_df, target_df):
        oof = np.zeros((len(train_df), self.cfg.model.n_classes))
        cv = 0

        for fold_, col in enumerate(self.fold_df.columns):
            print(
                f'\n========================== FOLD {fold_} ... ==========================\n'
            )
            logging.debug(
                f'\n========================== FOLD {fold_} ... ==========================\n'
            )

            trn_x, val_x = train_df[self.fold_df[col] == 0], train_df[
                self.fold_df[col] > 0]
            val_y = target_df[self.fold_df[col] > 0].values

            train_loader = factory.get_dataloader(trn_x, self.cfg.data.train)
            valid_loader = factory.get_dataloader(val_x, self.cfg.data.valid)

            model = factory.get_nn_model(self.cfg).to(device)

            criterion = factory.get_loss(self.cfg)
            optimizer = factory.get_optim(self.cfg, model.parameters())
            scheduler = factory.get_scheduler(self.cfg, optimizer)

            best_epoch = -1
            best_val_score = -np.inf
            mb = master_bar(range(self.cfg.model.epochs))

            train_loss_list = []
            val_loss_list = []
            val_score_list = []

            for epoch in mb:
                start_time = time.time()

                model, avg_loss = self._train_epoch(model, train_loader,
                                                    criterion, optimizer, mb)

                valid_preds, avg_val_loss = self._val_epoch(
                    model, valid_loader, criterion)

                val_score = factory.get_metrics(self.cfg.common.metrics.name)(
                    val_y, valid_preds)

                train_loss_list.append(avg_loss)
                val_loss_list.append(avg_val_loss)
                val_score_list.append(val_score)

                if self.cfg.scheduler.name != 'ReduceLROnPlateau':
                    scheduler.step()
                elif self.cfg.scheduler.name == 'ReduceLROnPlateau':
                    scheduler.step(avg_val_loss)

                elapsed = time.time() - start_time
                mb.write(
                    f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s'
                )
                logging.debug(
                    f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s'
                )

                if val_score > best_val_score:
                    best_epoch = epoch + 1
                    best_val_score = val_score
                    best_valid_preds = valid_preds
                    if self.cfg.model.multi_gpu:
                        best_model = model.module.state_dict()
                    else:
                        best_model = model.state_dict()

            oof[val_x.index, :] = best_valid_preds
            cv += best_val_score * self.fold_df[col].max()

            torch.save(best_model,
                       f'../logs/{self.run_name}/weight_best_{fold_}.pt')
            self._save_loss_png(train_loss_list, val_loss_list, val_score_list,
                                fold_)

            print(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')
            logging.debug(
                f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')

        print('\n\n===================================\n')
        print(f'CV: {cv:.6f}')
        logging.debug(f'\n\nCV: {cv:.6f}')
        print('\n===================================\n\n')

        self.oof = oof.reshape(-1, 5)

        return cv
Ejemplo n.º 2
0
def train_model(run_name, df, fold_df, cfg):
    oof = np.zeros(len(df))
    cv = 0

    for fold_, col in enumerate(fold_df.columns):
        print(f'\n========================== FOLD {fold_} ... ==========================\n')
        logging.debug(f'\n========================== FOLD {fold_} ... ==========================\n')

        trn_x, val_x = df[fold_df[col] == 0], df[fold_df[col] > 0]
        val_y = val_x.loc[:33126][cfg.common.target]

        val_org_idx = np.where(val_x.index <= 33126)[0]

        train_loader = factory.get_dataloader(trn_x, cfg.data.train)
        valid_loader = factory.get_dataloader(val_x, cfg.data.valid)

        model = factory.get_model(cfg).to(device)
        
        criterion = factory.get_loss(cfg)
        optimizer = factory.get_optim(cfg, model.parameters())
        scheduler = factory.get_scheduler(cfg, optimizer)

        best_epoch = -1
        best_val_score = -np.inf
        mb = master_bar(range(cfg.data.train.epochs))

        train_loss_list = []
        val_loss_list = []
        val_score_list = []

        for epoch in mb:
            start_time = time.time()

            model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

            valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

            val_score = factory.get_metrics(cfg.common.metrics.name)(val_y, valid_preds[val_org_idx])

            train_loss_list.append(avg_loss)
            val_loss_list.append(avg_val_loss)
            val_score_list.append(val_score)

            if cfg.scheduler.name != 'ReduceLROnPlateau':
                scheduler.step()
            elif cfg.scheduler.name == 'ReduceLROnPlateau':
                scheduler.step(avg_val_loss)
            
            elapsed = time.time() - start_time
            mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')
            logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')

            if val_score > best_val_score:
                best_epoch = epoch + 1
                best_val_score = val_score
                best_valid_preds = valid_preds
                if cfg.model.multi_gpu:
                    best_model = model.module.state_dict()
                else:
                    best_model = model.state_dict()

        oof[val_x.index] = best_valid_preds.reshape(-1)
        cv += best_val_score * fold_df[col].max()

        torch.save(best_model, f'../logs/{run_name}/weight_best_{fold_}.pt')
        save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list, fold_)

        print(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')
        logging.debug(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}')

    print('\n\n===================================\n')
    print(f'CV: {cv:.6f}')
    logging.debug(f'\n\nCV: {cv:.6f}')
    print('\n===================================\n\n')

    result = {
        'cv': cv,
    }

    np.save(f'../logs/{run_name}/oof.npy', oof)
    
    return result
Ejemplo n.º 3
0
def train_cnn(run_name, trn_x, val_x, trn_y, val_y, cfg):

    train_loader = factory.get_dataloader(trn_x, trn_y, cfg.data.train)
    valid_loader = factory.get_dataloader(val_x, val_y, cfg.data.valid)

    model = factory.get_model(cfg).to(device)
    
    criterion = factory.get_loss(cfg)
    optimizer = factory.get_optim(cfg, model.parameters())
    scheduler = factory.get_scheduler(cfg, optimizer)

    best_epoch = -1
    best_val_score = -np.inf
    best_coef = []
    mb = master_bar(range(cfg.data.train.epochs))

    train_loss_list = []
    val_loss_list = []
    val_score_list = []
    initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5]

    for epoch in mb:
        start_time = time.time()

        model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

        valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

        if cfg.model.n_classes > 1:
            val_score = quadratic_weighted_kappa(val_y, valid_preds.argmax(1))
            cm = confusion_matrix(val_y, valid_preds.argmax(1))
        else:
            optR = QWKOptimizedRounder()
            optR.fit(valid_preds.copy(), val_y, initial_coef)
            coef = optR.coefficients()
            valid_preds_class = optR.predict(valid_preds.copy(), coef)
            val_score = quadratic_weighted_kappa(val_y, valid_preds_class)
            cm = confusion_matrix(val_y, valid_preds_class)
        
        # cm = np.round(cm / np.sum(cm, axis=1, keepdims=True), 3)

        train_loss_list.append(avg_loss)
        val_loss_list.append(avg_val_loss)
        val_score_list.append(val_score)

        if cfg.scheduler.name != 'ReduceLROnPlateau':
            scheduler.step()
        elif cfg.scheduler.name == 'ReduceLROnPlateau':
            scheduler.step(avg_val_loss)
        
        elapsed = time.time() - start_time
        mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')
        logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s')

        if val_score > best_val_score:
            best_epoch = epoch + 1
            best_val_score = val_score
            best_valid_preds = valid_preds
            if cfg.model.multi_gpu:
                best_model = model.module.state_dict()
            else:
                best_model = model.state_dict()
            if cfg.model.n_classes == 1:
                best_coef = coef
            best_cm = cm

    print('\n\nCONFUSION MATRIX')
    logging.debug('\n\nCONFUSION MATRIX')
    print(cm)
    logging.debug(cm)

    print('\n\n===================================\n')
    print(f'CV: {best_val_score:.6f}')
    print(f'BEST EPOCH: {best_epoch}')
    logging.debug(f'\n\nCV: {best_val_score:.6f}')
    logging.debug(f'BEST EPOCH: {best_epoch}\n\n')
    print('\n===================================\n\n')

    result = {
        'cv': best_val_score,
    }

    np.save(f'../logs/{run_name}/oof.npy', best_valid_preds)
    np.save(f'../logs/{run_name}/best_coef.npy', best_coef)
    torch.save(best_model, f'../logs/{run_name}/weight_best.pt')
    save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list)
    
    return result
Ejemplo n.º 4
0
    def train(self, train_df, target_df):
        oof = np.zeros((len(train_df), self.cfg.model.n_classes))
        cv = 0

        for fold_, col in enumerate(self.fold_df.columns):
            print(f'\n========================== FOLD {fold_ + 1} / {self.n_splits} ... ==========================\n')
            logging.debug(f'\n========================== FOLD {fold_ + 1} / {self.n_splits} ... ==========================\n')

            trn_x, val_x = train_df[self.fold_df[col] == 0], train_df[self.fold_df[col] > 0]
            val_y = target_df[self.fold_df[col] > 0].values

            if 'transformer' in self.cfg.model.backbone:
                usecols = ['user_id', 'content_id', 'task_container_id', 'timestamp', 'prior_question_elapsed_time',
                           'prior_question_had_explanation', 'part', 'answered_correctly', 'te_content_id_by_answered_correctly',
                           'answered_correctly_avg_u']
                group = (trn_x[usecols]
                         .groupby('user_id')
                         .apply(lambda r: (r['content_id'].values,
                                           r['answered_correctly'].values,
                                           r['timestamp'].values,
                                           r['prior_question_elapsed_time'].values,
                                           r['part'].values,
                                           r['te_content_id_by_answered_correctly'].values,
                                           r['task_container_id'].values)))

                train_loader = factory.get_transformer_dataloader(samples=group, df=trn_x, cfg=self.cfg.data.train)
                valid_loader = factory.get_transformer_dataloader(samples=group, df=val_x, cfg=self.cfg.data.valid)
            else:
                train_loader = factory.get_dataloader(trn_x, self.cfg.data.train)
                valid_loader = factory.get_dataloader(val_x, self.cfg.data.valid)

            model = factory.get_nn_model(self.cfg).to(device)

            criterion = factory.get_loss(self.cfg)
            optimizer = factory.get_optim(self.cfg, model.parameters())
            scheduler = factory.get_scheduler(self.cfg, optimizer)

            best_epoch = -1
            best_val_score = -np.inf
            mb = master_bar(range(self.cfg.model.epochs))

            train_loss_list = []
            val_loss_list = []
            val_score_list = []

            for epoch in mb:
                start_time = time.time()

                with detect_anomaly():
                    model, avg_loss = self._train_epoch(model, train_loader, criterion, optimizer, mb)

                valid_preds, avg_val_loss = self._val_epoch(model, valid_loader, criterion)

                val_score = factory.get_metrics(self.cfg.common.metrics.name)(val_y, valid_preds)

                train_loss_list.append(avg_loss)
                val_loss_list.append(avg_val_loss)
                val_score_list.append(val_score)

                if self.cfg.scheduler.name != 'ReduceLROnPlateau':
                    scheduler.step()
                elif self.cfg.scheduler.name == 'ReduceLROnPlateau':
                    scheduler.step(avg_val_loss)

                elapsed = time.time() - start_time
                mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.6f}  avg_val_loss: {avg_val_loss:.6f} val_score: {val_score:.6f} time: {elapsed:.0f}s')
                logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.6f}  avg_val_loss: {avg_val_loss:.6f} val_score: {val_score:.6f} time: {elapsed:.0f}s')

                if val_score > best_val_score:
                    best_epoch = epoch + 1
                    best_val_score = val_score
                    best_valid_preds = valid_preds
                    if self.cfg.model.multi_gpu:
                        best_model = model.module.state_dict()
                    else:
                        best_model = model.state_dict()

            oof[val_x.index, :] = best_valid_preds
            cv += best_val_score * self.fold_df[col].max()

            torch.save(best_model, f'../logs/{self.run_name}/weight_best_{fold_}.pt')
            # self._save_loss_png(train_loss_list, val_loss_list, val_score_list, fold_)

            print(f'\nEpoch {best_epoch} - val_score: {best_val_score:.6f}')
            logging.debug(f'\nEpoch {best_epoch} - val_score: {best_val_score:.6f}')

        print('\n\n===================================\n')
        print(f'CV: {cv:.6f}')
        logging.debug(f'\n\nCV: {cv:.6f}')
        print('\n===================================\n\n')

        self.oof = oof

        return cv
Ejemplo n.º 5
0
def train_ordinal_reg(run_name, trn_x, val_x, trn_y, val_y, cfg):

    ordinal_val_preds = np.zeros_like(val_y)

    for i, col in enumerate(trn_y.columns[1:]):
        print(f'\n\n====================  {col}  ====================')
        logging.debug(f'\n\n====================  {col}  ====================')

        train_loader = factory.get_dataloader(trn_x, trn_y[col], cfg.data.train)
        valid_loader = factory.get_dataloader(val_x, val_y[col], cfg.data.valid)

        model = factory.get_model(cfg).to(device)
        
        criterion = factory.get_loss(cfg)
        optimizer = factory.get_optim(cfg, model.parameters())
        scheduler = factory.get_scheduler(cfg, optimizer)

        best_epoch = -1
        best_val_loss = np.inf
        mb = master_bar(range(cfg.data.train.epochs))

        train_loss_list = []
        val_loss_list = []
        val_score_list = []
        initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5]

        for epoch in mb:
            start_time = time.time()

            model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg)

            valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg)

            train_loss_list.append(avg_loss)
            val_loss_list.append(avg_val_loss)

            if cfg.scheduler.name != 'ReduceLROnPlateau':
                scheduler.step()
            elif cfg.scheduler.name == 'ReduceLROnPlateau':
                scheduler.step(avg_val_loss)

            elapsed = time.time() - start_time
            mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
            logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')

            if avg_val_loss < best_val_loss:
                best_epoch = epoch + 1
                best_val_loss = avg_val_loss
                best_valid_preds = valid_preds
                if cfg.model.multi_gpu:
                    best_model = model.module.state_dict()
                else:
                    best_model = model.state_dict()

        print(f'epoch: {best_epoch}   loss: {best_val_loss}')

        ordinal_val_preds[:, i] = 1 / (1 + np.exp(-1 * best_valid_preds))

        np.save(f'../logs/{run_name}/oof_{col}.npy', best_valid_preds)
        torch.save(best_model, f'../logs/{run_name}/weight_best_{col}.pt')

    valid_preds = np.sum(ordinal_val_preds, axis=1)
    val_y = (np.sum(val_y.values, axis=1) - 1).astype(int)

    optR = QWKOptimizedRounder()
    optR.fit(valid_preds.copy(), val_y, initial_coef)
    best_coef = optR.coefficients()
    valid_preds_class = optR.predict(valid_preds.copy(), best_coef)
    best_val_score = quadratic_weighted_kappa(val_y, valid_preds_class)
    cm = confusion_matrix(val_y, valid_preds_class)

    print('\n\nCONFUSION MATRIX')
    logging.debug('\n\nCONFUSION MATRIX')
    print(cm)
    logging.debug(cm)

    print('\n\n===================================\n')
    print(f'CV: {best_val_score:.6f}')
    logging.debug(f'\n\nCV: {best_val_score:.6f}')
    print('\n===================================\n\n')

    result = {
        'cv': best_val_score,
    }

    np.save(f'../logs/{run_name}/best_coef.npy', best_coef)
    
    return result