def test_log_metadata_successfully_with_minimum_information(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) data_set_id = data_set.id # ID should not change after logging twice. e.log_input(data_set) self.assertEqual(data_set_id, data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) metrics_id = metrics.id # ID should not change after logging twice. e.log_output(metrics) self.assertEqual(metrics_id, metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id) model_id = model.id # ID should not change after logging twice. e.log_output(model) self.assertEqual(model_id, model.id)
def save_checkpoint_metadata(self): exec = metadata.Execution( 'train', workspace=self.metadata_workspace ) model_md = metadata.Model( name=self.model_name, uri=self.checkpoint_root, version='0' ) exec.log_output(model_md)
def log_model_info(ws, ws_run, model_uri): exec2 = metadata.Execution( name="execution" + datetime.utcnow().isoformat("T"), workspace=ws, run=ws_run, description="train action", ) _ = exec2.log_input( metadata.Model(description="t2t model", name="t2t-model", owner="*****@*****.**", uri=model_uri, version="v1.0.0"))
def test_artifact_deduplication(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="workspace_one") ws2 = metadata.Workspace(store=store, name="workspace_two") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) e2 = metadata.Execution(name="execution 2", workspace=ws1) e3 = metadata.Execution(name="execution 3", workspace=ws2) self.assertIsNotNone(e.id) self.assertIsNotNone(e2.id) model = metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist", model_type="neural network", version="v0.0.1") model2 = metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist", model_type="neural network", version="v0.0.1") e.log_output(model) self.assertIsNotNone(model.id) e2.log_output(model2) self.assertIsNotNone(model2.id) self.assertEqual(model.id, model2.id)
def log_model(execution, model_name, model_uri): """ Log to Kubeflow artifacts the model :param execution: :param project_name: :param project_version: :param model_name: :param model_version: :param storage_bucket: :param file_name: :return: """ model = metadata.Model(name=model_name, uri=model_uri, version="1.0.0") execution.log_output(model) return model
def test_log_metadata_successfully_with_minimum_information(self): ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id)
def log_model(model_name, uri, model_version, execution, labels): model = execution.log_output( metadata.Model(name=model_name, description="Model to identify tacos or burritos", owner="KaizenTeam", uri=uri, model_type="Sequential Model", training_framework={ "name": "tensorflow", "version": "v2.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version=model_version, labels=labels)) print(model) print("\nModel id is {0.id} and version is {0.version}".format(model))
def log_model_info(ws, ws_run, description, name, owner, model_uri, version, hyperparameters, learning_rate, layers, early_stop, labels): exec = metadata.Execution( name="Execution" + datetime.utcnow().isoformat("T"), workspace=ws, run=ws_run, description="Model log exec.", ) model_log = exec.log_input( metadata.Model( description=description, name=name, owner=owner, uri=model_uri, version=version, hyperparameters=hyperparameters, learning_rate=learning_rate, layers=layers, early_stop=early_stop, labels=labels, ))
def save_model_metadata(exec, batch_size, epochs, export_path): training_file = 'gs://dlaas-model/metadata/model.csv' with file_io.FileIO(training_file, 'w') as f: metric_writer = csv.writer(f) metric_writer.writerow(['model_framework', 'tensorflow', 'v2.0']) metric_writer.writerow(['learning_rate', 0.5]) metric_writer.writerow(['epoch', epochs]) metric_writer.writerow(['batch_size', batch_size]) metric_writer.writerow(['layers', "28, 28, 1"]) # Save model; model_version = "model_version_" + str(uuid4()) model = exec.log_output( metadata.Model(name="MNIST", description="model to recognize images", owner="*****@*****.**", uri=export_path, model_type="CNN", training_framework={ "name": "tensorflow", "version": "v2.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [28, 28, 1], "epochs": str(epochs), "batch-size": str(batch_size), "early_stop": True }, version=model_version, labels={"tag": "train"})) print(model) print("\nModel id is {0.id} and version is {0.version}".format(model)) return model
def test_log_metadata_successfully(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="test_log_metadata_successfully_ws", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) trainer = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(trainer.id) data_set = trainer.log_input( metadata.DataSet(description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version=str(uuid.uuid4()), query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) self.assertIsNotNone(repr(data_set)) metrics = trainer.log_output( metadata.Metrics( name="MNIST-evaluation", description= "validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) self.assertIsNotNone(repr(metrics)) model_version = str(uuid.uuid4()) model = trainer.log_output( metadata.Model(name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version=model_version, labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) self.assertIsNotNone(repr(model)) serving_application = metadata.Execution( name="serving model", workspace=ws1, description="an execution to represent model serving component", ) self.assertIsNotNone(serving_application.id) # Use model name, version, uri to uniquely identify existing model. served_model = metadata.Model( name="MNIST", uri="gcs://my-bucket/mnist", version=model_version, ) serving_application.log_input(served_model) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. model_events = ws1.store.get_events_by_artifact_ids([model.id]) self.assertEqual(len(model_events), 2) execution_ids = set(e.execution_id for e in model_events) assert execution_ids == set([serving_application.id, trainer.id]) trainer_events = ws1.store.get_events_by_execution_ids([trainer.id]) artifact_ids = set(e.artifact_id for e in trainer_events) assert artifact_ids == set([model.id, metrics.id, data_set.id])
def test_log_metadata_successfully(self): ws1 = metadata.Workspace( backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) e = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet( description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version="v1.0.0", query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics( name="MNIST-evaluation", description="validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model( name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version="v0.0.1", labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. output_events = ws1.client.list_events2(model.id).events assert len(output_events) == 1 execution_id = output_events[0].execution_id assert execution_id == e.id all_events = ws1.client.list_events(execution_id).events assert len(all_events) == 3