Exemple #1
0
def load_config(yaml_file):
    config = OmegaConf.load(yaml_file)
    aruco_params = config.get('aruco_params', {})

    boards = {k:OmegaConf.merge(config.common, board) for k, board in config.boards.items()} if 'common' in config\
      else config.boards

    def instantiate_board(config):
        if config._type_ == "charuco":
            schema = OmegaConf.structured(CharucoConfig)
            return CharucoBoard(aruco_params=aruco_params,
                                **merge_schema(config, schema))
        elif config._type_ == "aprilgrid":
            schema = OmegaConf.structured(AprilConfig)
            return AprilGrid(**merge_schema(config, schema))
        else:
            assert False, f"unknown board type: {config._type_}, options are (charuco | aprilgrid | checkerboard)"

    return {k: instantiate_board(board) for k, board in boards.items()}
Exemple #2
0
        #     print("Text: ", text)
        #     print("Preds: ", get_text_spans(text, old_offsets))
        #     print("Clean Preds: ", get_text_spans(text, new_offsets))

        with open(
                os.path.join(
                    save_dir,
                    f"spans-pred-{test_file.split('/')[-1].split('.')[0]}.txt"
                ),
                "w",
        ) as f:
            for i, spans in enumerate(new_final_offset_predictions):
                f.write(f"{i}\t{str(spans)}\n")


if __name__ == "__main__":
    random.seed(SEED)
    np.random.seed(SEED)
    set_seed(SEED)
    parser = argparse.ArgumentParser(prog="run_baseline_model.py",
                                     description="Train Baseline RNNSL Model.")
    parser.add_argument(
        "--config",
        type=str,
        action="store",
        help="The configuration for model training/evaluation",
    )
    args = parser.parse_args()
    config = OmegaConf.load(args.config)
    predict(**dict(config))
Exemple #3
0
def _sweep(
    task: str,
    model: str,
    fold: int,
):
    """
    Determine best postprocessing parameters for a trained model

    Args:
        task: current task
        model: full name of the model run determine empricial parameters for
            e.g. RetinaUNetV001_D3V001_3d
        fold: current fold
    """
    nndet_data_dir = Path(os.getenv("det_models"))
    task = get_task(task, name=True, models=True)
    train_dir = nndet_data_dir / task / model / f"fold{fold}"

    cfg = OmegaConf.load(str(train_dir / "config.yaml"))
    os.chdir(str(train_dir))

    logger.remove()
    logger.add(sys.stdout, format="{level} {message}", level="INFO")
    log_file = Path(os.getcwd()) / "sweep.log"
    logger.add(log_file, level="INFO")
    logger.info(f"Log file at {log_file}")

    plan = load_pickle(train_dir / "plan.pkl")
    data_dir = Path(cfg.host["preprocessed_output_dir"]
                    ) / plan["data_identifier"] / "imagesTr"

    module = MODULE_REGISTRY[cfg["module"]](
        model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True),
        trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True),
        plan=plan,
    )

    splits = load_pickle(train_dir / "splits.pkl")
    case_ids = splits[cfg["exp"]["fold"]]["val"]
    inference_plan = module.sweep(
        cfg=OmegaConf.to_container(cfg, resolve=True),
        save_dir=train_dir,
        train_data_dir=data_dir,
        case_ids=case_ids,
        run_prediction=True,  # TODO: add commmand line arg
    )

    plan["inference_plan"] = inference_plan
    save_pickle(plan, train_dir / "plan_inference.pkl")

    ensembler_cls = module.get_ensembler_cls(
        key="boxes", dim=plan["network_dim"])  # TODO: make this configurable
    for restore in [True, False]:
        target_dir = train_dir / "val_predictions" if restore else \
            train_dir / "val_predictions_preprocessed"
        extract_results(
            source_dir=train_dir / "sweep_predictions",
            target_dir=target_dir,
            ensembler_cls=ensembler_cls,
            restore=restore,
            **inference_plan,
        )

    _evaluate(
        task=cfg["task"],
        model=cfg["exp"]["id"],
        fold=cfg["exp"]["fold"],
        test=False,
        do_boxes_eval=True,  # TODO: make this configurable
        do_analyze_boxes=True,  # TODO: make this configurable
    )
Exemple #4
0
def run():
    on_kaggle = False  # change me True if you use kaggle
    pretrain_model = False
    cfg = OmegaConf.load(
        f"{'../input/src-code0' if on_kaggle else './'}/src/test.yaml")
    # os.chdir(utils.get_original_cwd())
    #     log.info(OmegaConf.to_yaml(cfg))
    cfg['device'] = 'cuda'
    print(cfg['device'])
    cfg['list_seed'] = [i for i in range(cfg.model.nseed)]
    verbose = 1
    local_path = '../'
    path = f'../input/lish-moa'
    path_model = f"{'/kaggle/input/models0' if pretrain_model else '/kaggle/working' if on_kaggle else '../models'}"
    cfg['path_model'] = path_model

    ######################################
    # data_load and preprocess
    ######################################
    data_dict = load_and_preprocess_data(cfg,
                                         path,
                                         pca_append_test=True,
                                         variancethreshold_append_test=True,
                                         verbose=1)

    ######################################
    # cv
    ######################################
    CV = MultilabelStratifiedKFold(n_splits=cfg.model.nfolds, random_state=42)

    ##################################################
    # Train
    ##################################################
    oof = np.zeros((len(data_dict['train']), len(data_dict['target_cols'])))
    predictions = np.zeros(
        (len(data_dict['test']), len(data_dict['target_cols'])))
    for seed in tqdm(cfg['list_seed'], leave=verbose):
        return_run_k_fold = run_k_fold_nn(data_dict,
                                          cfg,
                                          cv=CV,
                                          seed=seed,
                                          file_prefix='m1',
                                          pretrain_model=pretrain_model,
                                          verbose=verbose)
        if not pretrain_model:
            oof_, predictions_ = return_run_k_fold
            oof += oof_ / cfg.model.nseed
        else:
            predictions_ = return_run_k_fold
        predictions += predictions_ / cfg.model.nseed
        gc.collect()

    train = data_dict['train'].copy()
    test = data_dict['test'].copy()
    target = data_dict['target'].copy()
    feature_cols = data_dict['feature_cols']
    target_cols = data_dict['target_cols']
    train_targets_scored = data_dict['train_targets_scored']
    test_features = data_dict['test_features']

    if not pretrain_model:
        train[target_cols] = oof
    test[target_cols] = predictions

    ##################################################
    # valodation and save
    ##################################################

    if not pretrain_model:
        y_true = train_targets_scored[target_cols].values
        valid_results = train_targets_scored.drop(columns=target_cols).merge(
            train[['sig_id'] + target_cols], on='sig_id', how='left').fillna(0)
        y_pred = valid_results[target_cols].values

        score = 0
        for i in range(len(target_cols)):
            score_ = log_loss(y_true[:, i], y_pred[:, i])
            score += score_ / len(target_cols)

        print(f"CV log_loss: {score}")
        log.info(f"CV log_loss: {score}")
        log.info(f"y_true.shape: {y_true.shape}")
        log.info(f"y_pred.shape: {y_pred.shape}")

    # sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id'] + target_cols], on='sig_id',
    #                                                         how='left').fillna(0)
    # sub.to_csv('submission.csv', index=False)
    # log.info(f"sub.shape: {sub.shape}")

    res = test[['sig_id'] + target_cols]
    corner_case = test_features[test_features['cp_type'] == 'ctl_vehicle']
    zeros = np.zeros((corner_case.shape[0], len(target_cols)))
    corner_case[target_cols] = zeros
    corner_case = corner_case[['sig_id'] + target_cols]
    res = pd.concat([res, corner_case], axis=0)

    res.to_csv('submission.csv', index=False)

    if not pretrain_model:
        return score
    else:
        return 0