Esempio n. 1
0
    def test_invalid_workspace_should_fail(self):
        with self.assertRaises(ValueError):
            ws1 = metadata.Workspace(name="ws_1",
                                     description="a workspace for testing",
                                     labels={"n1": "v1"})

        with self.assertRaises(ValueError):
            ws1 = metadata.Workspace(backend_url_prefix=127,
                                     name="ws_1",
                                     description="a workspace for testing",
                                     labels={"n1": "v1"})
Esempio n. 2
0
def get_or_create_workspace(ws_name):
    return metadata.Workspace(
        # Connect to metadata-service in namesapce kubeflow in the k8s cluster.
        backend_url_prefix=METADATA_SERVICE,
        name=ws_name,
        description="a workspace for the GitHub summarization task",
        labels={"n1": "v1"})
Esempio n. 3
0
    def __init__(self, workspace_name="vessel-xgboost-example", desc=""):
        self._ws_name = workspace_name

        self._ws = metadata.Workspace(
            # Connect to metadata-service in namesapce kubeflow in k8s cluster.
            backend_url_prefix=KUBEFLOW_METADATA_URL_PREFIX,
            name=self._ws_name)
Esempio n. 4
0
    def test_log_metadata_successfully(self):
        ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080",
                                 name="ws_1",
                                 description="a workspace for testing",
                                 labels={"n1": "v1"})

        r = metadata.Run(
            workspace=ws1,
            name="first run",
            description="first run in ws_1",
        )

        data_set = r.log(
            metadata.DataSet(description="an example data",
                             name="mytable-dump",
                             owner="*****@*****.**",
                             uri="file://path/to/dataset",
                             version="v1.0.0",
                             query="SELECT * FROM mytable"))
        assert data_set.id

        metrics = r.log(
            metadata.Metrics(
                name="MNIST-evaluation",
                description=
                "validating the MNIST model to recognize handwritten digits",
                owner="*****@*****.**",
                uri="gcs://my-bucket/mnist-eval.csv",
                data_set_id="123",
                model_id="12345",
                metrics_type=metadata.Metrics.VALIDATION,
                values={"accuracy": 0.95},
                labels={"mylabel": "l1"}))
        assert metrics.id

        model = r.log(
            metadata.Model(name="MNIST",
                           description="model to recognize handwritten digits",
                           owner="*****@*****.**",
                           uri="gcs://my-bucket/mnist",
                           model_type="neural network",
                           training_framework={
                               "name": "tensorflow",
                               "version": "v1.0"
                           },
                           hyperparameters={
                               "learning_rate": 0.5,
                               "layers": [10, 3, 1],
                               "early_stop": True
                           },
                           version="v0.0.1",
                           labels={"mylabel": "l1"}))
        assert model.id

        self.assertTrue(len(ws1.list()) > 0)
        self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0)
        self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0)
        self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)
Esempio n. 5
0
  def test_log_metadata_successfully_with_minimum_information(self):
    ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1")

    r = metadata.Run(workspace=ws1, name="first run")

    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset"))
    self.assertIsNotNone(data_set.id)

    metrics = e.log_output(
        metadata.Metrics(name="MNIST-evaluation",
            uri="gcs://my-bucket/mnist-eval.csv"))
    self.assertIsNotNone(metrics.id)

    model = e.log_output(
        metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist"))
    self.assertIsNotNone(model.id)
Esempio n. 6
0
 def test_log_invalid_artifacts_should_fail(self):
     ws = metadata.Workspace(backend_url_prefix="127.0.0.1:8080",
                             name="ws_1",
                             description="a workspace for testing",
                             labels={"n1": "v1"})
     e = metadata.Execution(name="test execution", workspace=ws)
     artifact1 = ArtifactFixture(
         openapi_client.MlMetadataArtifact(
             uri="gs://uri",
             custom_properties={
                 metadata.WORKSPACE_PROPERTY_NAME:
                 openapi_client.MlMetadataValue(string_value="ws1"),
             }))
     self.assertRaises(ValueError, e.log_input, artifact1)
     artifact2 = ArtifactFixture(
         openapi_client.MlMetadataArtifact(
             uri="gs://uri",
             custom_properties={
                 metadata.RUN_PROPERTY_NAME:
                 openapi_client.MlMetadataValue(string_value="run1"),
             }))
     self.assertRaises(ValueError, e.log_output, artifact2)
Esempio n. 7
0
    def create_execution(self, workspace_name, owner, execution_name_prefix,
                         run_name_prefix, desc):

        self._ws_name = workspace_name
        self._owner = owner
        self._ws = metadata.Workspace(
            # Connect to metadata-service in namesapce kubeflow in k8s cluster.
            backend_url_prefix=KUBEFLOW_METADATA_URL_PREFIX,
            name=self._ws_name)

        self._r = metadata.Run(workspace=self._ws,
                               name="run" + "-" + run_name_prefix + "-" +
                               datetime.utcnow().isoformat("T"),
                               description="")

        self._exec = metadata.Execution(name="execution" + "-" +
                                        execution_name_prefix + "-" +
                                        datetime.utcnow().isoformat("T"),
                                        workspace=self._ws,
                                        run=self._r,
                                        description="")

        self._model = None
Esempio n. 8
0
from keras.losses import *
import tensorflow as tf
import os
from kfmd import metadata
from datetime import datetime
from keras import backend as K
from kubernetes import client as k8s_client, config as k8s_config

# Create a workspace, run and execution

# In[3]:

execTime = datetime.utcnow().isoformat("T")
ws = metadata.Workspace(
    # Connect to metadata-service in namesapce kubeflow in k8s cluster.
    backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080",
    name="recommender",
    description="a workspace for saving recommender experiments")
r = metadata.Run(
    workspace=ws,
    name="run-" + execTime,
    description="recommender run",
)
exec = metadata.Execution(
    name="execution" + execTime,
    workspace=ws,
    run=r,
    description="recommender ML execution",
)

# # 2. Read data
Esempio n. 9
0
  def test_log_metadata_successfully(self):
    ws1 = metadata.Workspace(
        backend_url_prefix="127.0.0.1:8080",
        name="ws_1",
        description="a workspace for testing",
        labels={"n1": "v1"})

    r = metadata.Run(
        workspace=ws1,
        name="first run",
        description="first run in ws_1",
    )

    e = metadata.Execution(
        name="test execution",
        workspace=ws1,
        run=r,
        description="an execution",
    )
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(
            description="an example data",
            name="mytable-dump",
            owner="*****@*****.**",
            uri="file://path/to/dataset",
            version="v1.0.0",
            query="SELECT * FROM mytable"))
    self.assertIsNotNone(data_set.id)

    metrics = e.log_output(
        metadata.Metrics(
            name="MNIST-evaluation",
            description="validating the MNIST model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist-eval.csv",
            data_set_id="123",
            model_id="12345",
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(metrics.id)

    model = e.log_output(
        metadata.Model(
            name="MNIST",
            description="model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v1.0"
            },
            hyperparameters={
                "learning_rate": 0.5,
                "layers": [10, 3, 1],
                "early_stop": True
            },
            version="v0.0.1",
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(model.id)

    # Test listing artifacts in a workspace
    self.assertTrue(len(ws1.list()) > 0)
    self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)

    # Test lineage tracking.
    output_events = ws1.client.list_events2(model.id).events
    assert len(output_events) == 1
    execution_id = output_events[0].execution_id
    assert execution_id == e.id
    all_events = ws1.client.list_events(execution_id).events
    assert len(all_events) == 3
# In[1]:

get_ipython().system('pip install kfmd --upgrade --user')
get_ipython().system('pip install pandas --upgrade --user')

from kfmd import metadata
import pandas
from datetime import datetime

# Create a workspace, run and execution

# In[2]:

ws1 = metadata.Workspace(
    # Connect to metadata-service in namesapce kubeflow in k8s cluster.
    backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080",
    name="ws1",
    description="a workspace for testing",
    labels={"n1": "v1"})
r = metadata.Run(
    workspace=ws1,
    name="run-" + datetime.utcnow().isoformat("T"),
    description="a run in ws_1",
)
exec = metadata.Execution(
    name="execution" + datetime.utcnow().isoformat("T"),
    workspace=ws1,
    run=r,
    description="execution example",
)

# Log data set, model and its evaluation