Example #1
0
def predict_tags(
    text: Optional[
        str] = "Transfer learning with BERT for self-supervised learning",
    model_dir: Path = config.MODEL_DIR,
) -> Dict:
    """Predict tags for a give input text using a trained model.

    Warning:
        Make sure that you have a trained model first!

    Args:
        text (str, optional): Input text to predict tags for.
                              Defaults to "Transfer learning with BERT for self-supervised learning".
        model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR.

    Raises:
        ValueError: Run id doesn't exist in experiment.

    Returns:
        Predicted tags for input text.
    """
    # Predict
    artifacts = main.load_artifacts(model_dir=model_dir)
    prediction = predict.predict(texts=[text], artifacts=artifacts)
    logger.info(json.dumps(prediction, indent=2))

    return prediction
Example #2
0
def behavioral_reevaluation(
    model_dir: Path = config.MODEL_DIR,
):  # pragma: no cover, requires changing existing runs
    """Reevaluate existing runs on current behavioral tests in eval.py.
    This is possible since behavioral tests are inputs applied to black box
    models and compared with expected outputs. There is not dependency on
    data or model versions.

    Args:
        model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR.

    Raises:
        ValueError: Run id doesn't exist in experiment.
    """

    # Generate behavioral report
    artifacts = main.load_artifacts(model_dir=model_dir)
    artifacts["performance"]["behavioral"] = eval.get_behavioral_report(
        artifacts=artifacts)
    mlflow.log_metric("behavioral_score",
                      artifacts["performance"]["behavioral"]["score"])

    # Log updated performance
    utils.save_dict(artifacts["performance"],
                    Path(model_dir, "performance.json"))
Example #3
0
def predict_tags(
        text: Optional[
            str] = "Transfer learning with BERT for self-supervised learning",
        run_id: str = open(Path(config.MODEL_DIR, "run_id.txt")).read(),
) -> Dict:
    """Predict tags for a give input text using a trained model.

    Warning:
        Make sure that you have a trained model first!

    Args:
        text (str, optional): Input text to predict tags for.
                              Defaults to "Transfer learning with BERT for self-supervised learning".
        run_id (str): ID of the model run to load artifacts. Defaults to run ID in config.MODEL_DIR.

    Raises:
        ValueError: Run id doesn't exist in experiment.

    Returns:
        Predicted tags for input text.
    """
    # Predict
    artifacts = main.load_artifacts(run_id=run_id)
    prediction = predict.predict(texts=[text], artifacts=artifacts)
    logger.info(json.dumps(prediction, indent=2))

    return prediction
Example #4
0
def load_best_artifacts():
    global runs, run_ids, best_artifacts, best_run_id
    runs = utils.get_sorted_runs(experiment_name="best",
                                 order_by=["metrics.f1 DESC"])
    run_ids = [run["run_id"] for run in runs]
    best_run_id = run_ids[0]
    best_artifacts = main.load_artifacts(run_id=best_run_id,
                                         device=torch.device("cpu"))
    logger.info(
        "Loaded trained model and other required artifacts for inference!")
Example #5
0
def _run(request: Request, run_id: str) -> Dict:
    """Get details about a specific run."""
    artifacts = main.load_artifacts(run_id=run_id)
    response = {
        "message": HTTPStatus.OK.phrase,
        "status-code": HTTPStatus.OK,
        "data": {
            "run_id": run_id,
            "performance": artifacts["performance"]
        },
    }
    return response
Example #6
0
    def update_behavioral_report(run_id):
        with mlflow.start_run(run_id=run_id):
            # Generate behavioral report
            artifacts = main.load_artifacts(run_id=run_id)
            behavioral_report = eval.get_behavioral_report(artifacts=artifacts)
            mlflow.log_metric("behavioral_score", behavioral_report["score"])

            # Log artifacts
            with tempfile.TemporaryDirectory() as dp:
                utils.save_dict(behavioral_report,
                                Path(dp, "behavioral_report.json"))
                mlflow.log_artifacts(dp)
        logger.info(f"Updated behavioral report for run_id {run_id}")
Example #7
0
def _predict(request: Request, run_id: str, payload: PredictPayload) -> Dict:
    """Predict tags for a list of texts using artifacts from run `run_id`."""
    artifacts = main.load_artifacts(run_id=run_id)
    texts = [item.text for item in payload.texts]
    predictions = predict.predict(texts=texts, artifacts=artifacts)
    response = {
        "message": HTTPStatus.OK.phrase,
        "status-code": HTTPStatus.OK,
        "data": {
            "run_id": run_id,
            "predictions": predictions
        },
    }
    return response
Example #8
0
def predict_tags(
    text: Optional[
        str] = "Transfer learning with BERT for self-supervised learning",
    experiment_name: Optional[str] = "best",
    run_id: Optional[str] = "",
) -> Dict:
    """Predict tags for a give input text using a trained model.

    Warning:
        Make sure that you have a trained model first!

    Args:
        text (str, optional): Input text to predict tags for.
                              Defaults to "Transfer learning with BERT for self-supervised learning".
        experiment_name (str, optional): Name of the experiment to fetch run from.
        run_id (str, optional): ID of the run to load model artifacts from.
                                Defaults to run with highest F1 score.

    Raises:
        ValueError: Run id doesn't exist in experiment.

    Returns:
        Predicted tags for input text.
    """
    # Get sorted runs
    runs = utils.get_sorted_runs(
        experiment_name=experiment_name,
        order_by=["metrics.f1 DESC"],
    )
    run_ids = [run["run_id"] for run in runs]

    # Get best run
    if not run_id:
        run_id = run_ids[0]

    # Validate run id
    if run_id not in run_ids:  # pragma: no cover, simple value check
        raise ValueError(
            f"Run_id {run_id} does not exist in experiment {experiment_name}")

    # Predict
    artifacts = main.load_artifacts(run_id=run_id)
    prediction = predict.predict(texts=[text], artifacts=artifacts)
    logger.info(json.dumps(prediction, indent=2))

    return prediction
Example #9
0
def get_artifacts(run_id):
    artifacts = main.load_artifacts(run_id=run_id)
    return artifacts
Example #10
0
def load_artifacts():
    global artifacts
    run_id = open(Path(config.CONFIG_DIR, "run_id.txt")).read()
    artifacts = main.load_artifacts(run_id=run_id)
    logger.info("Ready for inference!")
Example #11
0
                                        max_filter_size=args.max_filter_size)
    val_dataset = data.CNNTextDataset(X=X_val,
                                      y=y_val,
                                      max_filter_size=args.max_filter_size)
    test_dataset = data.CNNTextDataset(X=X_test,
                                       y=y_test,
                                       max_filter_size=args.max_filter_size)
    train_dataloader = train_dataset.create_dataloader(
        batch_size=args.batch_size)
    val_dataloader = val_dataset.create_dataloader(batch_size=args.batch_size)
    test_dataloader = test_dataset.create_dataloader(
        batch_size=args.batch_size)

    # Load artifacts
    runs = utils.get_sorted_runs(experiment_name="best",
                                 order_by=["metrics.f1 DESC"])
    run_ids = [run["run_id"] for run in runs]
    artifacts = main.load_artifacts(run_id=run_ids[0],
                                    device=torch.device("cpu"))

    # Evaluation
    device = torch.device("cpu")
    performance, behavioral_report = evaluate(
        artifacts=artifacts,
        dataloader=test_dataloader,
        df=test_df,
        device=device,
    )
    logger.info(json.dumps(performance, indent=2))
    logger.info(json.dumps(behavioral_report, indent=2))
Example #12
0
def get_artifacts(model_dir):
    artifacts = main.load_artifacts(model_dir=model_dir)
    return artifacts
Example #13
0
def load_artifacts():
    global artifacts
    artifacts = main.load_artifacts(model_dir=config.MODEL_DIR)
    logger.info("Ready for inference!")