Python Store 예제들, kubeflow.metadata.metadata.Store Python 예제들

예제 #1

0

파일 보기

  def test_log_metadata_successfully_with_minimum_information(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store, name="ws_1")
    r = metadata.Run(workspace=ws1, name="first run")
    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    self.assertIsNotNone(e.id)

    data_set = e.log_input(
        metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset"))
    self.assertIsNotNone(data_set.id)
    data_set_id = data_set.id
    # ID should not change after logging twice.
    e.log_input(data_set)
    self.assertEqual(data_set_id, data_set.id)

    metrics = e.log_output(
        metadata.Metrics(name="MNIST-evaluation",
                         uri="gcs://my-bucket/mnist-eval.csv"))
    self.assertIsNotNone(metrics.id)
    metrics_id = metrics.id
    # ID should not change after logging twice.
    e.log_output(metrics)
    self.assertEqual(metrics_id, metrics.id)

    model = e.log_output(
        metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist"))
    self.assertIsNotNone(model.id)
    model_id = model.id
    # ID should not change after logging twice.
    e.log_output(model)
    self.assertEqual(model_id, model.id)

예제 #2

0

파일 보기

파일: ModelUtil.py 프로젝트: sbakiu/ml-kf-pipeline

    def create_execution():
        """
        Prepare an execution for artifacts storage
        :return:
        """
        metadata_store = metadata.Store(
            grpc_host=ModelUtil.METADATA_STORE_HOST,
            grpc_port=ModelUtil.METADATA_STORE_PORT)

        workspace = metadata.Workspace(
            # Connect to metadata service in namespace kubeflow in k8s cluster.
            store=metadata_store,
            name="workspace_1",
            description="a workspace for testing",
            labels={"n1": "v1"})

        run = metadata.Run(
            workspace=workspace,
            name="run-" + datetime.utcnow().isoformat("T"),
            description="a run in ws_1",
        )

        execution = metadata.Execution(
            name="execution" + datetime.utcnow().isoformat("T"),
            workspace=workspace,
            run=run,
            description="execution example",
        )
        print("An execution was created with id %s" % execution.id)
        return execution

예제 #3

0

파일 보기

 def _get_or_create_workspace(self, ws_name, metadata_store_host,
                              metadata_store_port):
     return metadata.Workspace(store=metadata.Store(
         grpc_host=metadata_store_host, grpc_port=metadata_store_port),
                               name=ws_name,
                               description="Workspace %s" % ws_name,
                               labels={"n1": "v1"})

예제 #4

0

파일 보기

파일: 00_metadata_logger.py 프로젝트: velascoluis/cnn-sentence-classifier-dev

def get_or_create_workspace(ws_name):
    return metadata.Workspace(
        store=metadata.Store(grpc_host=METADATA_STORE_HOST,
                             grpc_port=METADATA_STORE_PORT),
        name=ws_name,
        description="CNN Text classifier metadata workspace",
        labels={"n1": "v1"})

예제 #5

0

파일 보기

 def test_init_store_with_ssl_config(self):
   # TODO: There is a type error in underlying ml_metadate library:
   #   TypeError: expected certificate to be bytes, got <class 'str'>
   # Fix this unit test once this bug is fixed.
   with self.assertRaises(TypeError):
     metadata.Store(grpc_host=GRPC_HOST,
                    grpc_port=GRPC_PORT,
                    root_certificates=b"cert",
                    private_key=b"private_key",
                    certificate_chain=b"chain")
   with patch('ml_metadata.metadata_store.metadata_store.MetadataStore',
              new=CheckMetadataStore) as m:
     metadata.Store(grpc_host=GRPC_HOST,
                    grpc_port=GRPC_PORT,
                    root_certificates=b"cert",
                    private_key=b"private_key",
                    certificate_chain=b"chain")

예제 #6

0

파일 보기

파일: registerartifacts.py 프로젝트: yilun-msft/kubemlops

def get_ws(workspace, description):  # noqa: E501
    # default DNS of Kubeflow Metadata gRPC serivce.
    METADATA_STORE_HOST = "metadata-grpc-service.kubeflow"
    METADATA_STORE_PORT = 8080
    ws = metadata.Workspace(store=metadata.Store(
        grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),
                            name=workspace,
                            description=description,
                            labels={"Version": "v1"})
    return ws

예제 #7

0

파일 보기

파일: test_metadata.py 프로젝트: swiftdiaries/metadata

    def test_creating_workspace_with_existing_name(self):
        store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
        ws_name = "non_unique_ws_" + str(uuid.uuid4())
        ws1 = metadata.Workspace(store=store,
                                 name=ws_name,
                                 description="a workspace for testing",
                                 labels={"n1": "v1"})

        ws2 = metadata.Workspace(store, ws_name)
        assert ws1.context_id == ws2.context_id

        with pytest.raises(ValueError, match=r".*exists with id.*"):
            metadata.Workspace(store, ws_name, reuse_workspace_if_exists=False)

예제 #8

0

파일 보기

def create_metadata_execution():
    global metadata
    # Create Metadata Workspace and a Exec to log details
    mnist_train_workspace = metadata.Workspace(
        # Connect to metadata service in namespace kubeflow in k8s cluster.
        store=metadata.Store(grpc_host=METADATA_STORE_HOST,
                             grpc_port=METADATA_STORE_PORT),
        name="mnist train workspace",
        description="a workspace for training mnist",
        labels={"n1": "v1"})
    run1 = metadata.Run(workspace=mnist_train_workspace,
                        name="run-" + datetime.utcnow().isoformat("T"),
                        description="a run in ws_1")
    exec = metadata.Execution(name="execution" +
                              datetime.utcnow().isoformat("T"),
                              workspace=mnist_train_workspace,
                              run=run1,
                              description="execution example")
    print("An execution was created with id %s" % exec.id)
    return exec

예제 #9

0

파일 보기

 def test_log_invalid_artifacts_should_fail(self):
   store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
   ws = metadata.Workspace(store=store,
                           name="ws_1",
                           description="a workspace for testing",
                           labels={"n1": "v1"})
   e = metadata.Execution(name="test execution", workspace=ws)
   artifact1 = ArtifactFixture(
       mlpb.Artifact(uri="gs://uri",
                     custom_properties={
                         metadata._WORKSPACE_PROPERTY_NAME:
                             mlpb.Value(string_value="ws1"),
                     }))
   self.assertRaises(ValueError, e.log_input, artifact1)
   artifact2 = ArtifactFixture(
       mlpb.Artifact(uri="gs://uri",
                     custom_properties={
                         metadata._RUN_PROPERTY_NAME:
                             mlpb.Value(string_value="run1"),
                     }))
   self.assertRaises(ValueError, e.log_output, artifact2)

예제 #10

0

파일 보기

  def test_artifact_deduplication(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store, name="workspace_one")
    ws2 = metadata.Workspace(store=store, name="workspace_two")
    r = metadata.Run(workspace=ws1, name="first run")
    e = metadata.Execution(name="test execution", workspace=ws1, run=r)
    e2 = metadata.Execution(name="execution 2", workspace=ws1)
    e3 = metadata.Execution(name="execution 3", workspace=ws2)
    self.assertIsNotNone(e.id)
    self.assertIsNotNone(e2.id)

    model = metadata.Model(name="MNIST",
                           uri="gcs://my-bucket/mnist",
                           model_type="neural network",
                           version="v0.0.1")
    model2 = metadata.Model(name="MNIST",
                            uri="gcs://my-bucket/mnist",
                            model_type="neural network",
                            version="v0.0.1")
    e.log_output(model)
    self.assertIsNotNone(model.id)
    e2.log_output(model2)
    self.assertIsNotNone(model2.id)
    self.assertEqual(model.id, model2.id)

예제 #11

0

파일 보기

 def create_metadata_store(self):
     self.metadata_store = metadata.Store(
         grpc_host=os.getenv('METADATA_STORE_HOST',
                             'metadata-grpc-service.kubeflow'),
         grpc_port=int(os.getenv('METADATA_STORE_PORT', '8080')))
     return self.metadata_store

예제 #12

0

파일 보기

  def test_log_metadata_successfully(self):
    store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT)
    ws1 = metadata.Workspace(store=store,
                             name="test_log_metadata_successfully_ws",
                             description="a workspace for testing",
                             labels={"n1": "v1"})

    r = metadata.Run(
        workspace=ws1,
        name="first run",
        description="first run in ws_1",
    )

    trainer = metadata.Execution(
        name="test execution",
        workspace=ws1,
        run=r,
        description="an execution",
    )
    self.assertIsNotNone(trainer.id)

    data_set = trainer.log_input(
        metadata.DataSet(description="an example data",
                         name="mytable-dump",
                         owner="*****@*****.**",
                         uri="file://path/to/dataset",
                         version=str(uuid.uuid4()),
                         query="SELECT * FROM mytable"))
    self.assertIsNotNone(data_set.id)
    self.assertIsNotNone(repr(data_set))

    metrics = trainer.log_output(
        metadata.Metrics(
            name="MNIST-evaluation",
            description=
            "validating the MNIST model to recognize handwritten digits",
            owner="*****@*****.**",
            uri="gcs://my-bucket/mnist-eval.csv",
            data_set_id="123",
            model_id="12345",
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
    self.assertIsNotNone(metrics.id)
    self.assertIsNotNone(repr(metrics))

    model_version = str(uuid.uuid4())
    model = trainer.log_output(
        metadata.Model(name="MNIST",
                       description="model to recognize handwritten digits",
                       owner="*****@*****.**",
                       uri="gcs://my-bucket/mnist",
                       model_type="neural network",
                       training_framework={
                           "name": "tensorflow",
                           "version": "v1.0"
                       },
                       hyperparameters={
                           "learning_rate": 0.5,
                           "layers": [10, 3, 1],
                           "early_stop": True
                       },
                       version=model_version,
                       labels={"mylabel": "l1"}))
    self.assertIsNotNone(model.id)
    self.assertIsNotNone(repr(model))

    serving_application = metadata.Execution(
        name="serving model",
        workspace=ws1,
        description="an execution to represent model serving component",
    )
    self.assertIsNotNone(serving_application.id)
    # Use model name, version, uri to uniquely identify existing model.
    served_model = metadata.Model(
        name="MNIST",
        uri="gcs://my-bucket/mnist",
        version=model_version,
    )
    serving_application.log_input(served_model)

    # Test listing artifacts in a workspace
    self.assertTrue(len(ws1.list()) > 0)
    self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0)
    self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0)

    # Test lineage tracking.
    model_events = ws1.store.get_events_by_artifact_ids([model.id])
    self.assertEqual(len(model_events), 2)
    execution_ids = set(e.execution_id for e in model_events)
    assert execution_ids == set([serving_application.id, trainer.id])
    trainer_events = ws1.store.get_events_by_execution_ids([trainer.id])
    artifact_ids = set(e.artifact_id for e in trainer_events)
    assert artifact_ids == set([model.id, metrics.id, data_set.id])