def test_log_metadata_successfully_with_minimum_information(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) data_set_id = data_set.id # ID should not change after logging twice. e.log_input(data_set) self.assertEqual(data_set_id, data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) metrics_id = metrics.id # ID should not change after logging twice. e.log_output(metrics) self.assertEqual(metrics_id, metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id) model_id = model.id # ID should not change after logging twice. e.log_output(model) self.assertEqual(model_id, model.id)
def log_param(self, run_id, param): workspace_id = self.mldm_workspace exec = self.mldm_exec metric_type = param.key metric_timestamp = param.timestamp metric_value = param.value metric_log = exec.log_input( metadata.Metrics(name="mlfow-metric", metrics_type=metric_type, uri='file://', values=metric_value)) print("Input Param logged")
def log_metric(self, run_id, metric): workspace_id = self.mldm_workspace exec = self.mldm_exec metric_type = metric.key metric_timestamp = metric.timestamp metric_value = metric.value metric_log = exec.log_output( metadata.Metrics(name="mlfow-metric", metrics_type=metric_type, uri='file://', values=metric_value)) print("Output Metric logged")
def log_metric_info(ws, ws_run, description, name, owner, metric_uri, data_set_id, model_id, metrics_type, values, labels): exec = metadata.Execution( name="Execution" + datetime.utcnow().isoformat("T"), workspace=ws, run=ws_run, description="Metric log exec.", ) metric_log = exec.log_input( metadata.Metrics(description=description, name=name, owner=owner, uri=metric_uri, data_set_id=data_set_id, model_id=model_id, metrics_type=metrics_type, values=values, labels=labels))
def test_log_metadata_successfully_with_minimum_information(self): ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id)
def save_metric_metadata(exec, model, test_acc, test_loss, bucket_name): metric_file = bucket_name + '/metadata/metrics.csv' with file_io.FileIO(metric_file, 'w') as f: metric_writer = csv.writer(f) metric_writer.writerow(['accuracy', test_acc]) metric_writer.writerow(['loss', test_loss]) # Save evaluation metrics = exec.log_output( metadata.Metrics( name="MNIST-evaluation", description="validating the MNIST model to recognize images", owner="*****@*****.**", uri=metric_file, model_id=str(model.id), metrics_type=metadata.Metrics.VALIDATION, values={ "accuracy": str(test_acc), "test_loss": str(test_loss) }, labels={"mylabel": "l1"})) print("Metrics id is %s" % metrics.id)
def test_log_metadata_successfully(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="test_log_metadata_successfully_ws", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) trainer = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(trainer.id) data_set = trainer.log_input( metadata.DataSet(description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version=str(uuid.uuid4()), query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) self.assertIsNotNone(repr(data_set)) metrics = trainer.log_output( metadata.Metrics( name="MNIST-evaluation", description= "validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) self.assertIsNotNone(repr(metrics)) model_version = str(uuid.uuid4()) model = trainer.log_output( metadata.Model(name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version=model_version, labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) self.assertIsNotNone(repr(model)) serving_application = metadata.Execution( name="serving model", workspace=ws1, description="an execution to represent model serving component", ) self.assertIsNotNone(serving_application.id) # Use model name, version, uri to uniquely identify existing model. served_model = metadata.Model( name="MNIST", uri="gcs://my-bucket/mnist", version=model_version, ) serving_application.log_input(served_model) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. model_events = ws1.store.get_events_by_artifact_ids([model.id]) self.assertEqual(len(model_events), 2) execution_ids = set(e.execution_id for e in model_events) assert execution_ids == set([serving_application.id, trainer.id]) trainer_events = ws1.store.get_events_by_execution_ids([trainer.id]) artifact_ids = set(e.artifact_id for e in trainer_events) assert artifact_ids == set([model.id, metrics.id, data_set.id])
def test_log_metadata_successfully(self): ws1 = metadata.Workspace( backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) e = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet( description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version="v1.0.0", query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics( name="MNIST-evaluation", description="validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model( name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version="v0.0.1", labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. output_events = ws1.client.list_events2(model.id).events assert len(output_events) == 1 execution_id = output_events[0].execution_id assert execution_id == e.id all_events = ws1.client.list_events(execution_id).events assert len(all_events) == 3