def test_invalid_workspace_should_fail(self): with self.assertRaises(ValueError): ws1 = metadata.Workspace(name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) with self.assertRaises(ValueError): ws1 = metadata.Workspace(backend_url_prefix=127, name="ws_1", description="a workspace for testing", labels={"n1": "v1"})
def get_or_create_workspace(ws_name): return metadata.Workspace( # Connect to metadata-service in namesapce kubeflow in the k8s cluster. backend_url_prefix=METADATA_SERVICE, name=ws_name, description="a workspace for the GitHub summarization task", labels={"n1": "v1"})
def __init__(self, workspace_name="vessel-xgboost-example", desc=""): self._ws_name = workspace_name self._ws = metadata.Workspace( # Connect to metadata-service in namesapce kubeflow in k8s cluster. backend_url_prefix=KUBEFLOW_METADATA_URL_PREFIX, name=self._ws_name)
def test_log_metadata_successfully(self): ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) data_set = r.log( metadata.DataSet(description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version="v1.0.0", query="SELECT * FROM mytable")) assert data_set.id metrics = r.log( metadata.Metrics( name="MNIST-evaluation", description= "validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) assert metrics.id model = r.log( metadata.Model(name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version="v0.0.1", labels={"mylabel": "l1"})) assert model.id self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)
def test_log_metadata_successfully_with_minimum_information(self): ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id)
def test_log_invalid_artifacts_should_fail(self): ws = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) e = metadata.Execution(name="test execution", workspace=ws) artifact1 = ArtifactFixture( openapi_client.MlMetadataArtifact( uri="gs://uri", custom_properties={ metadata.WORKSPACE_PROPERTY_NAME: openapi_client.MlMetadataValue(string_value="ws1"), })) self.assertRaises(ValueError, e.log_input, artifact1) artifact2 = ArtifactFixture( openapi_client.MlMetadataArtifact( uri="gs://uri", custom_properties={ metadata.RUN_PROPERTY_NAME: openapi_client.MlMetadataValue(string_value="run1"), })) self.assertRaises(ValueError, e.log_output, artifact2)
def create_execution(self, workspace_name, owner, execution_name_prefix, run_name_prefix, desc): self._ws_name = workspace_name self._owner = owner self._ws = metadata.Workspace( # Connect to metadata-service in namesapce kubeflow in k8s cluster. backend_url_prefix=KUBEFLOW_METADATA_URL_PREFIX, name=self._ws_name) self._r = metadata.Run(workspace=self._ws, name="run" + "-" + run_name_prefix + "-" + datetime.utcnow().isoformat("T"), description="") self._exec = metadata.Execution(name="execution" + "-" + execution_name_prefix + "-" + datetime.utcnow().isoformat("T"), workspace=self._ws, run=self._r, description="") self._model = None
from keras.losses import * import tensorflow as tf import os from kfmd import metadata from datetime import datetime from keras import backend as K from kubernetes import client as k8s_client, config as k8s_config # Create a workspace, run and execution # In[3]: execTime = datetime.utcnow().isoformat("T") ws = metadata.Workspace( # Connect to metadata-service in namesapce kubeflow in k8s cluster. backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080", name="recommender", description="a workspace for saving recommender experiments") r = metadata.Run( workspace=ws, name="run-" + execTime, description="recommender run", ) exec = metadata.Execution( name="execution" + execTime, workspace=ws, run=r, description="recommender ML execution", ) # # 2. Read data
def test_log_metadata_successfully(self): ws1 = metadata.Workspace( backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) e = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet( description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version="v1.0.0", query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics( name="MNIST-evaluation", description="validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model( name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version="v0.0.1", labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. output_events = ws1.client.list_events2(model.id).events assert len(output_events) == 1 execution_id = output_events[0].execution_id assert execution_id == e.id all_events = ws1.client.list_events(execution_id).events assert len(all_events) == 3
# In[1]: get_ipython().system('pip install kfmd --upgrade --user') get_ipython().system('pip install pandas --upgrade --user') from kfmd import metadata import pandas from datetime import datetime # Create a workspace, run and execution # In[2]: ws1 = metadata.Workspace( # Connect to metadata-service in namesapce kubeflow in k8s cluster. backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080", name="ws1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="run-" + datetime.utcnow().isoformat("T"), description="a run in ws_1", ) exec = metadata.Execution( name="execution" + datetime.utcnow().isoformat("T"), workspace=ws1, run=r, description="execution example", ) # Log data set, model and its evaluation