def compare_tags(texts: str, tags: List, artifacts: Dict, test_type: str) -> List: """Compare ground truth with predicted tags. Args: texts (List): List of input texts to predict on. tags (Dict): List of ground truth tags for each input. artifacts (Dict): Artifacts needed for inference. test_type (str): Type of test (INV, DIR, MFT, etc.) Returns: List: Results with inputs, predictions and success status. """ # Predict predictions = predict.predict(texts=texts, artifacts=artifacts) # Evaluate results = {"passed": [], "failed": []} for i, prediction in enumerate(predictions): result = { "input": { "text": texts[i], "tags": tags[i] }, "prediction": predictions[i], "type": test_type, } if all(tag in prediction["predicted_tags"] for tag in tags[i]): results["passed"].append(result) else: results["failed"].append(result) return results
def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", model_dir: Path = config.MODEL_DIR, ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Predict artifacts = main.load_artifacts(model_dir=model_dir) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def predict_tags( text: str = "Transfer learning with BERT for self-supervised learning", run_id: str = "", ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". run_id (str, optional): ID of the run to load model artifacts from. Defaults to model with lowest `best_val_loss` from the `best` experiment. Returns: Predicted tags for input text. """ # Get best run if not run_id: experiment_id = mlflow.get_experiment_by_name("best").experiment_id all_runs = mlflow.search_runs( experiment_ids=experiment_id, order_by=["metrics.best_val_loss ASC"], ) run_id = all_runs.iloc[0].run_id # Predict prediction = predict.predict(texts=[text], run_id=run_id) logger.info(json.dumps(prediction, indent=2)) return prediction
def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", run_id: str = open(Path(config.MODEL_DIR, "run_id.txt")).read(), ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". run_id (str): ID of the model run to load artifacts. Defaults to run ID in config.MODEL_DIR. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Predict artifacts = main.load_artifacts(run_id=run_id) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def _predict(request: Request, payload: PredictPayload) -> Dict: """Predict tags for a list of texts using the best run.""" # Predict texts = [item.text for item in payload.texts] predictions = predict.predict(texts=texts, artifacts=artifacts) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "predictions": predictions }, } return response
def _predict(request: Request, run_id: str, payload: PredictPayload) -> Dict: """Predict tags for a list of texts using artifacts from run `run_id`.""" artifacts = main.load_artifacts(run_id=run_id) texts = [item.text for item in payload.texts] predictions = predict.predict(texts=texts, artifacts=artifacts) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "run_id": run_id, "predictions": predictions }, } return response
def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", experiment_name: Optional[str] = "best", run_id: Optional[str] = "", ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". experiment_name (str, optional): Name of the experiment to fetch run from. run_id (str, optional): ID of the run to load model artifacts from. Defaults to run with highest F1 score. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Get sorted runs runs = utils.get_sorted_runs( experiment_name=experiment_name, order_by=["metrics.f1 DESC"], ) run_ids = [run["run_id"] for run in runs] # Get best run if not run_id: run_id = run_ids[0] # Validate run id if run_id not in run_ids: # pragma: no cover, simple value check raise ValueError( f"Run_id {run_id} does not exist in experiment {experiment_name}") # Predict artifacts = main.load_artifacts(run_id=run_id) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def predict_tags(text: str, run_id: str) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str): Input text to predict tags for. run_id (str): ID of the model run to load artifacts. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Predict artifacts = load_artifacts(run_id=run_id) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction