def go(self, model_name: str) -> None: # Load the model model = Model.load(download_model(model_name)) # Then call the check method of the model success = model.check() if not success: msg = f"Check of model {model.__class__!r} failed, check the output for reasons why" logger.warning(msg) sys.exit(1)
MODELS_NAMES = [ "defectenhancementtask", "component", "regression", "stepstoreproduce", "spambug", "testlabelselect", "testgroupselect", ] DEFAULT_EXPIRATION_TTL = 7 * 24 * 3600 # A week redis = Redis.from_url(os.environ.get("REDIS_URL", "redis://localhost/0")) MODEL_CACHE: ReadthroughTTLCache[str, Model] = ReadthroughTTLCache( timedelta(hours=1), lambda m: Model.load(f"{m}model") ) MODEL_CACHE.start_ttl_thread() cctx = zstandard.ZstdCompressor(level=10) def setkey(key: str, value: bytes, compress: bool = False) -> None: LOGGER.debug(f"Storing data at {key}: {value!r}") if compress: value = cctx.compress(value) redis.set(key, value) redis.expire(key, DEFAULT_EXPIRATION_TTL) def classify_bug(model_name: str, bug_ids: Sequence[int], bugzilla_token: str) -> str:
def __init__( self, model_name: str, repo_dir: str, git_repo_dir: str, method_defect_predictor_dir: str, use_single_process: bool, skip_feature_importance: bool, ): self.model_name = model_name self.repo_dir = repo_dir self.model = Model.load(download_model(model_name)) assert self.model is not None self.git_repo_dir = git_repo_dir if git_repo_dir: self.clone_git_repo( "hg::https://hg.mozilla.org/mozilla-central", git_repo_dir ) self.method_defect_predictor_dir = method_defect_predictor_dir if method_defect_predictor_dir: self.clone_git_repo( "https://github.com/lucapascarella/MethodDefectPredictor", method_defect_predictor_dir, "8cc47f47ffb686a29324435a0151b5fabd37f865", ) self.use_single_process = use_single_process self.skip_feature_importance = skip_feature_importance if model_name == "regressor": self.use_test_history = False model_data_X_path = f"{model_name}model_data_X" updated = download_check_etag( URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst") ) if updated: zstd_decompress(model_data_X_path) assert os.path.exists(model_data_X_path), "Decompressed X dataset exists" model_data_y_path = f"{model_name}model_data_y" updated = download_check_etag( URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst") ) if updated: zstd_decompress(model_data_y_path) assert os.path.exists(model_data_y_path), "Decompressed y dataset exists" with open(model_data_X_path, "rb") as fb: self.X = to_array(pickle.load(fb)) with open(model_data_y_path, "rb") as fb: self.y = to_array(pickle.load(fb)) past_bugs_by_function_path = "data/past_fixed_bugs_by_function.json" download_check_etag( PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst" ) zstd_decompress(past_bugs_by_function_path) assert os.path.exists(past_bugs_by_function_path) with open(past_bugs_by_function_path, "r") as f: self.past_bugs_by_function = json.load(f) if model_name == "testlabelselect": self.use_test_history = True assert db.download_support_file( test_scheduling.TEST_LABEL_SCHEDULING_DB, test_scheduling.PAST_FAILURES_LABEL_DB, ) self.past_failures_data = test_scheduling.get_past_failures("label", True) self.testfailure_model = cast( TestFailureModel, TestFailureModel.load(download_model("testfailure")) ) assert self.testfailure_model is not None