def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", model_dir: Path = config.MODEL_DIR, ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Predict artifacts = main.load_artifacts(model_dir=model_dir) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def behavioral_reevaluation( model_dir: Path = config.MODEL_DIR, ): # pragma: no cover, requires changing existing runs """Reevaluate existing runs on current behavioral tests in eval.py. This is possible since behavioral tests are inputs applied to black box models and compared with expected outputs. There is not dependency on data or model versions. Args: model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR. Raises: ValueError: Run id doesn't exist in experiment. """ # Generate behavioral report artifacts = main.load_artifacts(model_dir=model_dir) artifacts["performance"]["behavioral"] = eval.get_behavioral_report( artifacts=artifacts) mlflow.log_metric("behavioral_score", artifacts["performance"]["behavioral"]["score"]) # Log updated performance utils.save_dict(artifacts["performance"], Path(model_dir, "performance.json"))
def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", run_id: str = open(Path(config.MODEL_DIR, "run_id.txt")).read(), ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". run_id (str): ID of the model run to load artifacts. Defaults to run ID in config.MODEL_DIR. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Predict artifacts = main.load_artifacts(run_id=run_id) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def load_best_artifacts(): global runs, run_ids, best_artifacts, best_run_id runs = utils.get_sorted_runs(experiment_name="best", order_by=["metrics.f1 DESC"]) run_ids = [run["run_id"] for run in runs] best_run_id = run_ids[0] best_artifacts = main.load_artifacts(run_id=best_run_id, device=torch.device("cpu")) logger.info( "Loaded trained model and other required artifacts for inference!")
def _run(request: Request, run_id: str) -> Dict: """Get details about a specific run.""" artifacts = main.load_artifacts(run_id=run_id) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "run_id": run_id, "performance": artifacts["performance"] }, } return response
def update_behavioral_report(run_id): with mlflow.start_run(run_id=run_id): # Generate behavioral report artifacts = main.load_artifacts(run_id=run_id) behavioral_report = eval.get_behavioral_report(artifacts=artifacts) mlflow.log_metric("behavioral_score", behavioral_report["score"]) # Log artifacts with tempfile.TemporaryDirectory() as dp: utils.save_dict(behavioral_report, Path(dp, "behavioral_report.json")) mlflow.log_artifacts(dp) logger.info(f"Updated behavioral report for run_id {run_id}")
def _predict(request: Request, run_id: str, payload: PredictPayload) -> Dict: """Predict tags for a list of texts using artifacts from run `run_id`.""" artifacts = main.load_artifacts(run_id=run_id) texts = [item.text for item in payload.texts] predictions = predict.predict(texts=texts, artifacts=artifacts) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "run_id": run_id, "predictions": predictions }, } return response
def predict_tags( text: Optional[ str] = "Transfer learning with BERT for self-supervised learning", experiment_name: Optional[str] = "best", run_id: Optional[str] = "", ) -> Dict: """Predict tags for a give input text using a trained model. Warning: Make sure that you have a trained model first! Args: text (str, optional): Input text to predict tags for. Defaults to "Transfer learning with BERT for self-supervised learning". experiment_name (str, optional): Name of the experiment to fetch run from. run_id (str, optional): ID of the run to load model artifacts from. Defaults to run with highest F1 score. Raises: ValueError: Run id doesn't exist in experiment. Returns: Predicted tags for input text. """ # Get sorted runs runs = utils.get_sorted_runs( experiment_name=experiment_name, order_by=["metrics.f1 DESC"], ) run_ids = [run["run_id"] for run in runs] # Get best run if not run_id: run_id = run_ids[0] # Validate run id if run_id not in run_ids: # pragma: no cover, simple value check raise ValueError( f"Run_id {run_id} does not exist in experiment {experiment_name}") # Predict artifacts = main.load_artifacts(run_id=run_id) prediction = predict.predict(texts=[text], artifacts=artifacts) logger.info(json.dumps(prediction, indent=2)) return prediction
def get_artifacts(run_id): artifacts = main.load_artifacts(run_id=run_id) return artifacts
def load_artifacts(): global artifacts run_id = open(Path(config.CONFIG_DIR, "run_id.txt")).read() artifacts = main.load_artifacts(run_id=run_id) logger.info("Ready for inference!")
max_filter_size=args.max_filter_size) val_dataset = data.CNNTextDataset(X=X_val, y=y_val, max_filter_size=args.max_filter_size) test_dataset = data.CNNTextDataset(X=X_test, y=y_test, max_filter_size=args.max_filter_size) train_dataloader = train_dataset.create_dataloader( batch_size=args.batch_size) val_dataloader = val_dataset.create_dataloader(batch_size=args.batch_size) test_dataloader = test_dataset.create_dataloader( batch_size=args.batch_size) # Load artifacts runs = utils.get_sorted_runs(experiment_name="best", order_by=["metrics.f1 DESC"]) run_ids = [run["run_id"] for run in runs] artifacts = main.load_artifacts(run_id=run_ids[0], device=torch.device("cpu")) # Evaluation device = torch.device("cpu") performance, behavioral_report = evaluate( artifacts=artifacts, dataloader=test_dataloader, df=test_df, device=device, ) logger.info(json.dumps(performance, indent=2)) logger.info(json.dumps(behavioral_report, indent=2))
def get_artifacts(model_dir): artifacts = main.load_artifacts(model_dir=model_dir) return artifacts
def load_artifacts(): global artifacts artifacts = main.load_artifacts(model_dir=config.MODEL_DIR) logger.info("Ready for inference!")