Esempio n. 1
0
  def test_log_metadata_successfully_with_minimum_information(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store, name="ws_1")
    r = metadata.Run(workspace=ws1, name="first run")
    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset"))
    self.assertIsNotNone(data_set.id)
    data_set_id = data_set.id
    # ID should not change after logging twice.
    e.log_input(data_set)
    self.assertEqual(data_set_id, data_set.id)

    metrics = e.log_output(
        metadata.Metrics(name="MNIST-evaluation",
                         uri="gcs://my-bucket/mnist-eval.csv"))
    self.assertIsNotNone(metrics.id)
    metrics_id = metrics.id
    # ID should not change after logging twice.
    e.log_output(metrics)
    self.assertEqual(metrics_id, metrics.id)

    model = e.log_output(
        metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist"))
    self.assertIsNotNone(model.id)
    model_id = model.id
    # ID should not change after logging twice.
    e.log_output(model)
    self.assertEqual(model_id, model.id)
 def save_checkpoint_metadata(self):
     exec = metadata.Execution(
         'train', workspace=self.metadata_workspace
     )
     model_md = metadata.Model(
         name=self.model_name,
         uri=self.checkpoint_root,
         version='0'
     )
     exec.log_output(model_md)
Esempio n. 3
0
def log_model_info(ws, ws_run, model_uri):
    exec2 = metadata.Execution(
        name="execution" + datetime.utcnow().isoformat("T"),
        workspace=ws,
        run=ws_run,
        description="train action",
    )
    _ = exec2.log_input(
        metadata.Model(description="t2t model",
                       name="t2t-model",
                       owner="*****@*****.**",
                       uri=model_uri,
                       version="v1.0.0"))
Esempio n. 4
0
  def test_artifact_deduplication(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store, name="workspace_one")
    ws2 = metadata.Workspace(store=store, name="workspace_two")
    r = metadata.Run(workspace=ws1, name="first run")
    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    e2 = metadata.Execution(name="execution 2", workspace=ws1)
    e3 = metadata.Execution(name="execution 3", workspace=ws2)
    self.assertIsNotNone(e.id)
    self.assertIsNotNone(e2.id)

    model = metadata.Model(name="MNIST",
                           uri="gcs://my-bucket/mnist",
                           model_type="neural network",
                           version="v0.0.1")
    model2 = metadata.Model(name="MNIST",
                            uri="gcs://my-bucket/mnist",
                            model_type="neural network",
                            version="v0.0.1")
    e.log_output(model)
    self.assertIsNotNone(model.id)
    e2.log_output(model2)
    self.assertIsNotNone(model2.id)
    self.assertEqual(model.id, model2.id)
Esempio n. 5
0
    def log_model(execution, model_name, model_uri):
        """
        Log to Kubeflow artifacts the model
        :param execution:
        :param project_name:
        :param project_version:
        :param model_name:
        :param model_version:
        :param storage_bucket:
        :param file_name:
        :return:
        """

        model = metadata.Model(name=model_name, uri=model_uri, version="1.0.0")

        execution.log_output(model)
        return model
Esempio n. 6
0
  def test_log_metadata_successfully_with_minimum_information(self):
    ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1")

    r = metadata.Run(workspace=ws1, name="first run")

    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset"))
    self.assertIsNotNone(data_set.id)

    metrics = e.log_output(
        metadata.Metrics(name="MNIST-evaluation",
            uri="gcs://my-bucket/mnist-eval.csv"))
    self.assertIsNotNone(metrics.id)

    model = e.log_output(
        metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist"))
    self.assertIsNotNone(model.id)
Esempio n. 7
0
def log_model(model_name, uri, model_version, execution, labels):
    model = execution.log_output(
        metadata.Model(name=model_name,
                       description="Model to identify tacos or burritos",
                       owner="KaizenTeam",
                       uri=uri,
                       model_type="Sequential Model",
                       training_framework={
                           "name": "tensorflow",
                           "version": "v2.0"
                       },
                       hyperparameters={
                           "learning_rate": 0.5,
                           "layers": [10, 3, 1],
                           "early_stop": True
                       },
                       version=model_version,
                       labels=labels))

    print(model)
    print("\nModel id is {0.id} and version is {0.version}".format(model))
def log_model_info(ws, ws_run, description, name, owner, model_uri, version,
                   hyperparameters, learning_rate, layers, early_stop, labels):
    exec = metadata.Execution(
        name="Execution" + datetime.utcnow().isoformat("T"),
        workspace=ws,
        run=ws_run,
        description="Model log exec.",
    )
    model_log = exec.log_input(
        metadata.Model(
            description=description,
            name=name,
            owner=owner,
            uri=model_uri,
            version=version,
            hyperparameters=hyperparameters,
            learning_rate=learning_rate,
            layers=layers,
            early_stop=early_stop,
            labels=labels,
        ))
Esempio n. 9
0
def save_model_metadata(exec, batch_size, epochs, export_path):

    training_file = 'gs://dlaas-model/metadata/model.csv'

    with file_io.FileIO(training_file, 'w') as f:
        metric_writer = csv.writer(f)
        metric_writer.writerow(['model_framework', 'tensorflow', 'v2.0'])
        metric_writer.writerow(['learning_rate', 0.5])
        metric_writer.writerow(['epoch', epochs])
        metric_writer.writerow(['batch_size', batch_size])
        metric_writer.writerow(['layers', "28, 28, 1"])

    # Save model;
    model_version = "model_version_" + str(uuid4())
    model = exec.log_output(
        metadata.Model(name="MNIST",
                       description="model to recognize images",
                       owner="*****@*****.**",
                       uri=export_path,
                       model_type="CNN",
                       training_framework={
                           "name": "tensorflow",
                           "version": "v2.0"
                       },
                       hyperparameters={
                           "learning_rate": 0.5,
                           "layers": [28, 28, 1],
                           "epochs": str(epochs),
                           "batch-size": str(batch_size),
                           "early_stop": True
                       },
                       version=model_version,
                       labels={"tag": "train"}))
    print(model)
    print("\nModel id is {0.id} and version is {0.version}".format(model))
    return model
Esempio n. 10
0
  def test_log_metadata_successfully(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store,
                             name="test_log_metadata_successfully_ws",
                             description="a workspace for testing",
                             labels={"n1": "v1"})

    r = metadata.Run(
        workspace=ws1,
        name="first run",
        description="first run in ws_1",
    )

    trainer = metadata.Execution(
        name="test execution",
        workspace=ws1,
        run=r,
        description="an execution",
    )
    self.assertIsNotNone(trainer.id)

    data_set = trainer.log_input(
        metadata.DataSet(description="an example data",
                         name="mytable-dump",
                         owner="*****@*****.**",
                         uri="file://path/to/dataset",
                         version=str(uuid.uuid4()),
                         query="SELECT * FROM mytable"))
    self.assertIsNotNone(data_set.id)
    self.assertIsNotNone(repr(data_set))

    metrics = trainer.log_output(
        metadata.Metrics(
            name="MNIST-evaluation",
            description=
            "validating the MNIST model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist-eval.csv",
            data_set_id="123",
            model_id="12345",
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(metrics.id)
    self.assertIsNotNone(repr(metrics))

    model_version = str(uuid.uuid4())
    model = trainer.log_output(
        metadata.Model(name="MNIST",
                       description="model to recognize handwritten digits",
                       owner="*****@*****.**",
                       uri="gcs://my-bucket/mnist",
                       model_type="neural network",
                       training_framework={
                           "name": "tensorflow",
                           "version": "v1.0"
                       },
                       hyperparameters={
                           "learning_rate": 0.5,
                           "layers": [10, 3, 1],
                           "early_stop": True
                       },
                       version=model_version,
                       labels={"mylabel": "l1"}))
    self.assertIsNotNone(model.id)
    self.assertIsNotNone(repr(model))

    serving_application = metadata.Execution(
        name="serving model",
        workspace=ws1,
        description="an execution to represent model serving component",
    )
    self.assertIsNotNone(serving_application.id)
    # Use model name, version, uri to uniquely identify existing model.
    served_model = metadata.Model(
        name="MNIST",
        uri="gcs://my-bucket/mnist",
        version=model_version,
    )
    serving_application.log_input(served_model)

    # Test listing artifacts in a workspace
    self.assertTrue(len(ws1.list()) > 0)
    self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)

    # Test lineage tracking.
    model_events = ws1.store.get_events_by_artifact_ids([model.id])
    self.assertEqual(len(model_events), 2)
    execution_ids = set(e.execution_id for e in model_events)
    assert execution_ids == set([serving_application.id, trainer.id])
    trainer_events = ws1.store.get_events_by_execution_ids([trainer.id])
    artifact_ids = set(e.artifact_id for e in trainer_events)
    assert artifact_ids == set([model.id, metrics.id, data_set.id])
Esempio n. 11
0
  def test_log_metadata_successfully(self):
    ws1 = metadata.Workspace(
        backend_url_prefix="127.0.0.1:8080",
        name="ws_1",
        description="a workspace for testing",
        labels={"n1": "v1"})

    r = metadata.Run(
        workspace=ws1,
        name="first run",
        description="first run in ws_1",
    )

    e = metadata.Execution(
        name="test execution",
        workspace=ws1,
        run=r,
        description="an execution",
    )
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(
            description="an example data",
            name="mytable-dump",
            owner="*****@*****.**",
            uri="file://path/to/dataset",
            version="v1.0.0",
            query="SELECT * FROM mytable"))
    self.assertIsNotNone(data_set.id)

    metrics = e.log_output(
        metadata.Metrics(
            name="MNIST-evaluation",
            description="validating the MNIST model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist-eval.csv",
            data_set_id="123",
            model_id="12345",
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(metrics.id)

    model = e.log_output(
        metadata.Model(
            name="MNIST",
            description="model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v1.0"
            },
            hyperparameters={
                "learning_rate": 0.5,
                "layers": [10, 3, 1],
                "early_stop": True
            },
            version="v0.0.1",
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(model.id)

    # Test listing artifacts in a workspace
    self.assertTrue(len(ws1.list()) > 0)
    self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)

    # Test lineage tracking.
    output_events = ws1.client.list_events2(model.id).events
    assert len(output_events) == 1
    execution_id = output_events[0].execution_id
    assert execution_id == e.id
    all_events = ws1.client.list_events(execution_id).events
    assert len(all_events) == 3