def test_modelstore_create(makedirs, path_exists, modelstore_config): _ = r.ModelStore(modelstore_config) makedirs.assert_called_once_with( modelstore_config["model_store"]["location"] ) path_exists.reset_mock() makedirs.reset_mock() path_exists.return_value = True _ = r.ModelStore(modelstore_config) assert not makedirs.called
def test_list_models_ignore_obsolete_backups(_load_metadata, path_exists, modelstore_config, caplog): model_jsons = [ "mymodel_1.0.0.json", "mymodel_1.1.0.json", "anothermodel_0.0.1.json", ] backup_jsons = [ "mymodel_1.0.0_2021-08-01_18-00-00.json", "OBSOLETE-IGNORED_1.0.0_2021-07-31_12-00-00", ] def my_glob(pattern): if "previous" in pattern.lower(): return backup_jsons else: return model_jsons with mock.patch( "{}.glob.glob".format(r.__name__), side_effect=my_glob, ): with caplog.at_level(logging.DEBUG): ms = r.ModelStore(modelstore_config) models = ms.list_models() print(models) assert "ignoring" in caplog.text.lower()
def test_list_models(_load_metadata, path_exists, modelstore_config, caplog): model_jsons = [ "mymodel_1.0.0.json", "mymodel_1.1.0.json", "anothermodel_0.0.1.json", ] backup_jsons = [ "mymodel_1.0.0_2021-08-01_18-00-00.json", "mymodel_1.0.0_2021-07-31_12-00-00", ] def my_glob(pattern): if "previous" in pattern.lower(): return backup_jsons else: return model_jsons with mock.patch( "{}.glob.glob".format(r.__name__), side_effect=my_glob, ): with caplog.at_level(logging.DEBUG): ms = r.ModelStore(modelstore_config) models = ms.list_models() print(models) assert (len(models) == 2 ) # one model ID named "mymodel" and one named "anothermodel" assert models["mymodel"]["latest"] == models["mymodel"]["1.1.0"] assert len(models["mymodel"]["backups"]) == 2 assert models["anothermodel"]["backups"] == [] assert "ignoring" not in caplog.text.lower()
def _get_model_store(complete_conf, cache=True): global _cached_model_stores key = _model_key(complete_conf["model"]) model_store = _cached_model_stores.get(key) or resource.ModelStore( complete_conf) if cache: _cached_model_stores[key] = model_store return model_store
def test_modelstore_update_model_metrics(dump, load, modelstore_config): new_metrics = {"a": 1} ms = r.ModelStore(modelstore_config) ms.update_model_metrics(modelstore_config["model"], new_metrics) load.assert_called_once() dump.assert_called_once() name, contents = dump.call_args[0] assert contents["metrics"] == new_metrics hist = contents["metrics_history"] assert len(hist) == 2 del hist["0123"] assert hist.popitem()[1] == new_metrics
def test_modelstore_load(json, pkl, modelstore_config): with mock.patch("{}.open".format(r.__name__), mock.mock_open(), create=True) as mo: ms = r.ModelStore(modelstore_config) ms.load_trained_model(modelstore_config["model"]) model_conf = modelstore_config["model"] base_name = os.path.join( modelstore_config["model_store"]["location"], "{}_{}".format(model_conf["name"], model_conf["version"]), ) calls = [ mock.call("{}.pkl".format(base_name), "rb"), mock.call("{}.json".format(base_name), "r", encoding="utf-8"), ] mo.assert_has_calls(calls, any_order=True)
def test_modelstore_dump(glob, copy, makedirs, path_exists, modelstore_config): with mock.patch( "{}.open".format(r.__name__), mock.mock_open(), create=True ) as mo: ms = r.ModelStore(modelstore_config) ms.dump_trained_model(modelstore_config, {"hi": 1}, {"there": 2}) model_conf = modelstore_config["model"] base_name = os.path.join( modelstore_config["model_store"]["location"], "{}_{}".format(model_conf["name"], model_conf["version"]), ) calls = [ mock.call("{}.pkl".format(base_name), "wb"), mock.call("{}.json".format(base_name), "w"), ] mo.assert_has_calls(calls, any_order=True)
def test_modelstore_dump_extra_model_keys(jsond, pickled, path_exists, modelstore_config): modelstore_config["model"]["extraparam"] = 42 modelstore_config["model"]["anotherparam"] = 23 modelstore_config["model"]["created"] = "colliding keys should not occur" with mock.patch("{}.open".format(r.__name__), mock.mock_open(), create=True) as _: ms = r.ModelStore(modelstore_config) ms.dump_trained_model(modelstore_config, {"pseudo_model": 1}, {"pseudo_metrics": 2}) dumped = jsond.call_args[0][0] print(modelstore_config) print(dumped) assert "extraparam" in dumped assert dumped["extraparam"] == 42 assert "anotherparam" in dumped assert dumped["anotherparam"] == 23 assert dumped["created"] != "colliding keys should not occur"
def test_modelstore_train_report(jsond, pickled, path_exists, modelstore_config): with mock.patch("{}.open".format(r.__name__), mock.mock_open(), create=True) as _: ms = r.ModelStore(modelstore_config) ms.add_to_train_report("report_key", "report_val") ms.dump_trained_model(modelstore_config, {"pseudo_model": 1}, {"pseudo_metrics": 2}) dumped = jsond.call_args[0][0] print(modelstore_config) print(dumped) assert "train_report" in dumped assert "report_key" in dumped["train_report"] assert dumped["train_report"]["report_key"] == "report_val" assert "system" in dumped assert "mllaunchpad_version" in dumped["system"] assert "platform" in dumped["system"] assert "packages" in dumped["system"]
def list_models(model_store_location_or_config_dict: Union[Dict, str]): """Get information on all available versions of trained models. :param model_store_location_or_config_dict: Location of the model store. If you have a config dict available, use that instead. :type model_store_location_or_config_dict: Union[Dict, str] Side note: The return value includes backups of models that have been re-trained without changing the version number (they reside in the subdirectory ``previous``). Please note that these backed up models are just listed for information and are not available for loading (one would have to restore them by moving them up a directory level from ``previous``. Example:: import mllaunchpad as mllp my_cfg = mllp.get_validated_config("./my_config_file.yml") all_models = mllp.list_models(my_cfg) # also accepts model store location string # An example of what a ``list_models()``'s result would look like: # { # iris: { # 1.0.0: { ... complete metadata of this version number ... }, # 1.1.0: { ... }, # latest: { ... duplicate of metadata of highest available version number, here 1.1.0 ... }, # backups: [ {...}, {...}, ... ] # }, # my_other_model: { # 1.0.1: { ... }, # 2.0.0: { ... }, # latest: { ... }, # backups: [] # } # } :returns: Dict with information on all available trained models. """ ms = resource.ModelStore(model_store_location_or_config_dict) return ms.list_models()
def __init__(self, config, application, debug=False): """When initializing ModelApi, your model will be automatically retrieved from the model store based on the currently active configuration. Params: config: configuration dictionary to use application: flask application to use debug: use current prediction code instead of that of persisted model """ self.model_config = config["model"] model_store = resource.ModelStore(config) self.model_wrapper = self._load_model(model_store, self.model_config) if debug: # TODO: Hacky, should use model_actions functionality for a lot of API functionality instead of duplicating. # Create a fresh model object from current code and transplant existing contents m_cls = model_actions._get_model_class(config, cache=True) curr_model_wrapper = m_cls(contents=self.model_wrapper.contents) self.model_wrapper = curr_model_wrapper # Workaround (tensorflow has problem with spontaneously created threads such as with Flask): # https://kobkrit.com/tensor-something-is-not-an-element-of-this-graph-error-in-keras-on-flask-web-server-4173a8fe15e1 try: import tensorflow as tf graph = tf.get_default_graph() self.model_wrapper.__graph = graph except Exception as e: logger.debug( 'Optional tensorflow/flask workaround for "<tensor> is not an element of this graph" problem' + "resulted in: %s", e, ) else: logger.info( 'Stored tensorflow model\'s graph - tensorflow/flask workaround for "<tensor> is not an element of this graph" problem' ) self.datasources, self.datasinks = self._init_datasources(config) logger.debug("Initializing RESTful API") api = Api(application) api_url = get_api_base_url(config) raml = _load_raml(config) res_normal, res_with_id, res_file = _get_resources(raml) if res_file or res_normal: resource_urls = {"query": None, "file": None} parsers = {"query": None, "file": None} if res_normal: logger.debug( "Adding query resource %s to api %s", res_normal.path, api_url, ) resource_urls["query"] = api_url + res_normal.path parsers["query"] = _create_request_parser(res_normal) if res_file: logger.debug( "Adding file-based resource %s to api %s", res_file.path, api_url, ) resource_urls["file"] = api_url + res_file.path parsers["file"] = _create_request_parser(res_file) if ( len(resource_urls) == 2 and resource_urls["query"] == resource_urls["file"] ): api.add_resource( QueryOrFileUploadResource, # QueryResource, resource_urls["query"], resource_class_kwargs={ "model_api_obj": self, "query_parser": parsers["query"], "file_parser": parsers["file"], }, ) else: for k, res_url in resource_urls.items(): if not res_url: continue api.add_resource( QueryOrFileUploadResource, # QueryResource, res_url, resource_class_kwargs={ "model_api_obj": self, "query_parser": parsers["query"] if k == "query" else None, "file_parser": parsers["file"] if k == "file" else None, }, ) if res_with_id: logger.debug( "Adding url-id resource %s to api %s", res_with_id.path, api_url, ) uri_param_name = res_with_id.uri_params[-1].name resource_url = ( api_url + res_with_id.parent.path + "/<string:some_resource_id>" ) parser = _create_request_parser(res_with_id) api.add_resource( GetByIdResource, resource_url, resource_class_kwargs={ "model_api_obj": self, "parser": parser, "id_name": uri_param_name, }, )