def load_config(yaml_file): config = OmegaConf.load(yaml_file) aruco_params = config.get('aruco_params', {}) boards = {k:OmegaConf.merge(config.common, board) for k, board in config.boards.items()} if 'common' in config\ else config.boards def instantiate_board(config): if config._type_ == "charuco": schema = OmegaConf.structured(CharucoConfig) return CharucoBoard(aruco_params=aruco_params, **merge_schema(config, schema)) elif config._type_ == "aprilgrid": schema = OmegaConf.structured(AprilConfig) return AprilGrid(**merge_schema(config, schema)) else: assert False, f"unknown board type: {config._type_}, options are (charuco | aprilgrid | checkerboard)" return {k: instantiate_board(board) for k, board in boards.items()}
# print("Text: ", text) # print("Preds: ", get_text_spans(text, old_offsets)) # print("Clean Preds: ", get_text_spans(text, new_offsets)) with open( os.path.join( save_dir, f"spans-pred-{test_file.split('/')[-1].split('.')[0]}.txt" ), "w", ) as f: for i, spans in enumerate(new_final_offset_predictions): f.write(f"{i}\t{str(spans)}\n") if __name__ == "__main__": random.seed(SEED) np.random.seed(SEED) set_seed(SEED) parser = argparse.ArgumentParser(prog="run_baseline_model.py", description="Train Baseline RNNSL Model.") parser.add_argument( "--config", type=str, action="store", help="The configuration for model training/evaluation", ) args = parser.parse_args() config = OmegaConf.load(args.config) predict(**dict(config))
def _sweep( task: str, model: str, fold: int, ): """ Determine best postprocessing parameters for a trained model Args: task: current task model: full name of the model run determine empricial parameters for e.g. RetinaUNetV001_D3V001_3d fold: current fold """ nndet_data_dir = Path(os.getenv("det_models")) task = get_task(task, name=True, models=True) train_dir = nndet_data_dir / task / model / f"fold{fold}" cfg = OmegaConf.load(str(train_dir / "config.yaml")) os.chdir(str(train_dir)) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "sweep.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") plan = load_pickle(train_dir / "plan.pkl") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) splits = load_pickle(train_dir / "splits.pkl") case_ids = splits[cfg["exp"]["fold"]]["val"] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, # TODO: add commmand line arg ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
def run(): on_kaggle = False # change me True if you use kaggle pretrain_model = False cfg = OmegaConf.load( f"{'../input/src-code0' if on_kaggle else './'}/src/test.yaml") # os.chdir(utils.get_original_cwd()) # log.info(OmegaConf.to_yaml(cfg)) cfg['device'] = 'cuda' print(cfg['device']) cfg['list_seed'] = [i for i in range(cfg.model.nseed)] verbose = 1 local_path = '../' path = f'../input/lish-moa' path_model = f"{'/kaggle/input/models0' if pretrain_model else '/kaggle/working' if on_kaggle else '../models'}" cfg['path_model'] = path_model ###################################### # data_load and preprocess ###################################### data_dict = load_and_preprocess_data(cfg, path, pca_append_test=True, variancethreshold_append_test=True, verbose=1) ###################################### # cv ###################################### CV = MultilabelStratifiedKFold(n_splits=cfg.model.nfolds, random_state=42) ################################################## # Train ################################################## oof = np.zeros((len(data_dict['train']), len(data_dict['target_cols']))) predictions = np.zeros( (len(data_dict['test']), len(data_dict['target_cols']))) for seed in tqdm(cfg['list_seed'], leave=verbose): return_run_k_fold = run_k_fold_nn(data_dict, cfg, cv=CV, seed=seed, file_prefix='m1', pretrain_model=pretrain_model, verbose=verbose) if not pretrain_model: oof_, predictions_ = return_run_k_fold oof += oof_ / cfg.model.nseed else: predictions_ = return_run_k_fold predictions += predictions_ / cfg.model.nseed gc.collect() train = data_dict['train'].copy() test = data_dict['test'].copy() target = data_dict['target'].copy() feature_cols = data_dict['feature_cols'] target_cols = data_dict['target_cols'] train_targets_scored = data_dict['train_targets_scored'] test_features = data_dict['test_features'] if not pretrain_model: train[target_cols] = oof test[target_cols] = predictions ################################################## # valodation and save ################################################## if not pretrain_model: y_true = train_targets_scored[target_cols].values valid_results = train_targets_scored.drop(columns=target_cols).merge( train[['sig_id'] + target_cols], on='sig_id', how='left').fillna(0) y_pred = valid_results[target_cols].values score = 0 for i in range(len(target_cols)): score_ = log_loss(y_true[:, i], y_pred[:, i]) score += score_ / len(target_cols) print(f"CV log_loss: {score}") log.info(f"CV log_loss: {score}") log.info(f"y_true.shape: {y_true.shape}") log.info(f"y_pred.shape: {y_pred.shape}") # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id', # how='left').fillna(0) # sub.to_csv('submission.csv', index=False) # log.info(f"sub.shape: {sub.shape}") res = test[['sig_id'] + target_cols] corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle'] zeros = np.zeros((corner_case.shape[0], len(target_cols))) corner_case[target_cols] = zeros corner_case = corner_case[['sig_id'] + target_cols] res = pd.concat([res, corner_case], axis=0) res.to_csv('submission.csv', index=False) if not pretrain_model: return score else: return 0