def run_model(cfg: omegaconf.DictConfig) -> None: logger.info(f"Config: {omegaconf.OmegaConf.to_yaml(cfg)}") utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list) earlystopping_callback = hydra.utils.instantiate(cfg.callbacks.early_stopping) checkpoint_callback = hydra.utils.instantiate(cfg.callbacks.model_checkpoint) tb_logger = hydra.utils.instantiate(cfg.callbacks.tensorboard) lr_logger = hydra.utils.instantiate(cfg.callbacks.lr_logger) if cfg.training.pretrain_dir != "": logger.info(f"Loading the pre-trained model from: {cfg.training.pretrain_dir}") pretrain_path = utils.get_single_model_path(cfg.training.pretrain_dir) model = LitWheatModel.load_from_checkpoint(pretrain_path, hydra_cfg=cfg) # Number of classes in bad labels does not equal to the number of classes in good labels fc_layer_name = ( "_fc" if cfg.model.architecture_name.startswith("efficientnet") else "_classifier" ) if ( getattr(model.model, fc_layer_name).out_features != cfg.data_mode.num_classes ): fc = torch.nn.Linear( getattr(model.model, fc_layer_name).in_features, cfg.data_mode.num_classes, ) setattr(model.model, fc_layer_name, fc) else: logger.info("Training the model from scratch") model = LitWheatModel(hydra_cfg=cfg) trainer = pl.Trainer( max_epochs=cfg.training.max_epochs, min_epochs=cfg.training.max_epochs, logger=[tb_logger], early_stop_callback=earlystopping_callback, checkpoint_callback=checkpoint_callback, callbacks=[lr_logger], gradient_clip_val=0.5, gpus=cfg.general.gpu_list, fast_dev_run=False, distributed_backend="dp", precision=32, weights_summary=None, progress_bar_refresh_rate=50, deterministic=True, ) # model.setup() # # Run learning rate finder # lr_finder = trainer.lr_find(model) # fig = lr_finder.plot(suggest=True) # fig.savefig("/data/ybabakhin/data/zindi_wheat/zindi_wheat_growth/lrfinder.png") logger.info("Start fitting the model...") trainer.fit(model)
def run_inference(cfg: omegaconf.DictConfig) -> None: logger.info(" .. Testing Will Be Starting in few seconds .. ") test_df = pd.read_csv(cfg.testing.test_csv) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") checkpoints = glob.glob( os.path.join( cfg.general.logs_dir, "checkpoints", f"{cfg.model.architecture_name}{cfg.classifiermode.num_classes}", "*.ckpt")) num_models = len(checkpoints) if num_models == 0: sys.exit() if cfg.classifiermode.num_classes == 1: test_preds = np.zeros(len(test_df)) else: test_preds = np.zeros((len(test_df), 3)) for checkpoint_id, checkpoint_path in enumerate(checkpoints): output_name = checkpoint_path.split("/")[2] seed = int(checkpoint_path.split("/")[3].split(".")[0].split("_")[1]) utils.setup_environment(random_seed=seed, gpu_list=cfg.general.gpu_list) model = TuniziDialectClassifier.load_from_checkpoint(checkpoint_path, hydra_config=cfg) model.eval().to(device) test_predictions = [] with torch.no_grad(): for batch_idx, batch in enumerate(Bar(model.test_dataloader())): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] input_ids = input_ids.to(device, dtype=torch.long) attention_mask = attention_mask.to(device, dtype=torch.long) outputs = model.forward(input_ids, attention_mask=attention_mask) if cfg.classifiermode.num_classes == 1: outputs = torch.sigmoid(outputs).detach().cpu().numpy() test_predictions.append(outputs) else: outputs = torch.softmax(outputs, 1).detach().cpu().numpy() test_predictions.append(outputs) test_predictions = np.concatenate(test_predictions, axis=0) if cfg.classifiermode.num_classes == 1: test_predictions = test_predictions.reshape( test_predictions.shape[0]) gc.collect() torch.cuda.empty_cache() utils.create_submission(test_df, output_name + str(seed), test_predictions, cfg.classifiermode.num_classes)
def train_model(cfg: omegaconf.DictConfig) -> None: logger.info(f"Config: {omegaconf.OmegaConf.to_yaml(cfg)}") utils.setup_environment(random_seed=cfg.general.seed, gpu_list=cfg.general.gpu_list) tensorboard_logger = hydra.utils.instantiate(cfg.callbacks.tensorboard) model = TuniziDialectClassifier(hydra_config=cfg) trainer = pl.Trainer(max_epochs=cfg.training.max_epochs, min_epochs=cfg.training.min_epochs, logger=[tensorboard_logger], gpus=cfg.general.gpu_list, fast_dev_run=False, precision=32, progress_bar_refresh_rate=1, deterministic=True) logger.info(".. Shake your Hands Training Will Begin .. ") trainer.fit(model)
def run_model(cfg: omegaconf.DictConfig) -> None: utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list) if cfg.testing.mode == "valid": test = pd.read_csv(cfg.data_mode.train_csv) test = test[test.label_quality == 2].reset_index(drop=True) else: test = pd.read_csv(cfg.testing.test_csv) test = utils.preprocess_df(test, data_dir=cfg.data_mode.data_dir) logger.info(f"Length of the test data: {len(test)}") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") df_list = [] pred_list = [] for fold in cfg.testing.folds: if cfg.testing.mode == "valid": df_test = test[test.fold == fold].reset_index(drop=True) else: df_test = test checkpoints = glob.glob( os.path.join( cfg.general.logs_dir, f"model_{cfg.model.model_id}/fold_{fold}/*.ckpt" ) ) fold_predictions = np.zeros( (len(df_test), cfg.data_mode.num_classes, len(checkpoints)) ) for checkpoint_id, checkpoint_path in enumerate(checkpoints): model = lightning_models.LitWheatModel.load_from_checkpoint( checkpoint_path, hydra_cfg=cfg ) model.eval().to(device) test_dataset = dataset.ZindiWheatDataset( images=df_test.path.values, labels=None, preprocess_function=model.preprocess, augmentations=None, input_shape=(cfg.model.input_size[0], cfg.model.input_size[1], 3), crop_method=cfg.model.crop_method, ) test_loader = torch_data.DataLoader( test_dataset, batch_size=cfg.training.batch_size, num_workers=cfg.general.num_workers, shuffle=False, pin_memory=True, ) if cfg.testing.tta: model = tta.get_tta_model( model, crop_method=cfg.model.crop_method, input_size=cfg.model.input_size, ) if torch.cuda.is_available() and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) with torch.no_grad(): tq = tqdm.tqdm(test_loader, total=len(test_loader)) for idx, data in enumerate(tq): images = data["image"] images = images.to(device) preds = model(images) if not cfg.model.regression: preds = torch.softmax(preds, dim=1) preds = preds.cpu().detach().numpy() fold_predictions[ idx * cfg.training.batch_size : (idx + 1) * cfg.training.batch_size, :, checkpoint_id, ] = preds gc.collect() torch.cuda.empty_cache() fold_predictions = np.mean(fold_predictions, axis=-1) # OOF predictions for validation and pseudolabels if cfg.testing.mode == "valid" or cfg.testing.mode == "pseudo": df_list.append(df_test) pred_list.append(fold_predictions) multipliers = np.array(cfg.data_mode.rmse_multipliers) if cfg.testing.mode == "valid": test = pd.concat(df_list) probs = np.vstack(pred_list) filename = "validation_probs.pkl" elif cfg.testing.mode == "pseudo": for fold, df_test, probs in zip(cfg.testing.folds, df_list, pred_list): predictions = np.argmax(probs, axis=1) predictions = [multipliers[x] for x in predictions] df_test["growth_stage"] = predictions save_path = os.path.join( cfg.general.logs_dir, f"model_{cfg.model.model_id}/pseudo_fold_{fold}.csv", ) logger.info(f"Saving pseudolabels to {save_path}") df_test[["UID", "growth_stage"]].to_csv(save_path, index=False) return else: probs = np.stack(pred_list) probs = np.mean(probs, axis=0) filename = "test_probs.pkl" ensemble_probs = dict(zip(test.UID.values, probs)) utils.save_in_file_fast( ensemble_probs, file_name=os.path.join( cfg.general.logs_dir, f"model_{cfg.model.model_id}/{filename}" ), ) if not cfg.model.regression: probs = np.sum(probs * multipliers, axis=-1) predictions = np.clip(probs, min(multipliers), max(multipliers)) if cfg.testing.mode == "valid": rmse = np.sqrt( metrics.mean_squared_error(predictions, test.growth_stage.values) ) logger.info(f"OOF VALIDATION SCORE: {rmse:.5f}") test["pred"] = predictions save_path = os.path.join( cfg.general.logs_dir, f"model_{cfg.model.model_id}/valid_preds.csv" ) logger.info(f"Saving validation predictions to {save_path}") test[["UID", "pred"]].to_csv(save_path, index=False) else: test["growth_stage"] = predictions save_path = os.path.join( cfg.general.logs_dir, f"model_{cfg.model.model_id}/test_preds.csv" ) logger.info(f"Saving test predictions to {save_path}") test[["UID", "growth_stage"]].to_csv(save_path, index=False)
def make_ensemble(cfg: omegaconf.DictConfig) -> None: utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list) if cfg.testing.mode == "valid": train = pd.read_csv(cfg.data_mode.train_csv) predictions = utils.combine_dataframes( models_list=cfg.ensemble.model_ids, logs_dir=cfg.general.logs_dir, filename="valid_preds.csv", output_colname="pred", ) predictions = predictions.merge(train) if cfg.ensemble.postprocessing: for mult in cfg.data_mode.rmse_multipliers: predictions.loc[(predictions["pred"] < mult + 0.03) & (predictions["pred"] > mult - 0.03), "pred", ] = mult rmse = np.sqrt( metrics.mean_squared_error(predictions.growth_stage, predictions.pred)) logger.info(f"OOF ENSEMBLE VALIDATION SCORE: {rmse:.5f}") elif cfg.testing.mode == "pseudo": for fold in cfg.testing.folds: test_predictions = utils.combine_dataframes( models_list=cfg.ensemble.model_ids, logs_dir=cfg.general.logs_dir, filename=f"pseudo_fold_{fold}.csv", agg_func="mode", ) save_path = os.path.join( cfg.general.logs_dir, f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_pseudo_fold_{fold}.csv", ) logger.info(f"Saving pseudo predictions to {save_path}") test_predictions[["UID", "growth_stage"]].to_csv(save_path, index=False) else: test_predictions = utils.combine_dataframes( models_list=cfg.ensemble.model_ids, logs_dir=cfg.general.logs_dir, filename="test_preds.csv", ) if cfg.ensemble.postprocessing: for mult in cfg.data_mode.rmse_multipliers: test_predictions.loc[ (test_predictions["growth_stage"] < mult + 0.03) & (test_predictions["growth_stage"] > mult - 0.03), "growth_stage", ] = mult save_path = os.path.join( cfg.general.logs_dir, f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_ens.csv", ) logger.info(f"Saving test predictions to {save_path}") test_predictions[["UID", "growth_stage"]].to_csv(save_path, index=False)
def make_ensemble(cfg: omegaconf.DictConfig) -> None: utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list) if cfg.testing.mode == "valid": train = pd.read_csv(cfg.data_mode.train_csv, index_col="UID") train = train[["growth_stage", "fold"]] feature_columns = list(range(len(cfg.ensemble.model_ids))) predictions = utils.combine_dataframes( models_list=cfg.ensemble.model_ids, logs_dir=cfg.general.logs_dir, filename="valid_preds.csv", agg_func=None, ) predictions.columns = feature_columns train = train.join(predictions, how="inner") lightgbm_params = { "boosting_type": "gbdt", "objective": "regression", "metric": "rmse", "num_leaves": 2, "learning_rate": 0.05, "feature_fraction": 0.6, "bagging_fraction": 0.9, "bagging_freq": 5, "verbose": 1, } train["pred"] = -1 multipliers = np.array(cfg.data_mode.rmse_multipliers) model_name = "_".join([str(x) for x in cfg.ensemble.model_ids]) for fold in cfg.testing.folds: train_folds = [f for f in cfg.testing.folds if f != fold] x_train = train.loc[train.fold.isin(train_folds), feature_columns].values y_train = train.loc[train.fold.isin(train_folds), "growth_stage"].values x_test = train.loc[train.fold == fold, feature_columns].values y_test = train.loc[train.fold == fold, "growth_stage"].values train_data = lightgbm.Dataset(x_train, label=y_train) test_data = lightgbm.Dataset(x_test, label=y_test) gbm = lightgbm.train( lightgbm_params, train_data, valid_sets=test_data, num_boost_round=5000, early_stopping_rounds=100, ) preds = gbm.predict(x_test) preds = np.clip(preds, min(multipliers), max(multipliers)) train.loc[train.fold == fold, "pred"] = preds gbm.save_model( os.path.join( cfg.general.logs_dir, f"{model_name}_stacking_fold_{fold}.txt" ), num_iteration=gbm.best_iteration, ) rmse = np.sqrt(metrics.mean_squared_error(train.growth_stage, train.pred)) logger.info(f"STACKING VALIDATION SCORE: {rmse:.5f}") else: test_predictions = utils.combine_dataframes( models_list=cfg.ensemble.model_ids, logs_dir=cfg.general.logs_dir, filename="test_preds.csv", agg_func=None, ) feature_columns = list(range(len(cfg.ensemble.model_ids))) test_predictions.columns = feature_columns model_name = "_".join([str(x) for x in cfg.ensemble.model_ids]) test_predictions["growth_stage"] = 0 multipliers = np.array(cfg.data_mode.rmse_multipliers) for fold in cfg.testing.folds: gbm = lightgbm.Booster( model_file=os.path.join( cfg.general.logs_dir, f"{model_name}_stacking_fold_{fold}.txt" ) ) preds = gbm.predict(test_predictions[feature_columns].values) preds = np.clip(preds, min(multipliers), max(multipliers)) test_predictions["growth_stage"] += preds / len(cfg.testing.folds) save_path = os.path.join( cfg.general.logs_dir, f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_ens.csv", ) logger.info(f"Saving test predictions to {save_path}") test_predictions.reset_index()[["UID", "growth_stage"]].to_csv(save_path, index=False)