def create_execution(): """ Prepare an execution for artifacts storage :return: """ metadata_store = metadata.Store( grpc_host=ModelUtil.METADATA_STORE_HOST, grpc_port=ModelUtil.METADATA_STORE_PORT) workspace = metadata.Workspace( # Connect to metadata service in namespace kubeflow in k8s cluster. store=metadata_store, name="workspace_1", description="a workspace for testing", labels={"n1": "v1"}) run = metadata.Run( workspace=workspace, name="run-" + datetime.utcnow().isoformat("T"), description="a run in ws_1", ) execution = metadata.Execution( name="execution" + datetime.utcnow().isoformat("T"), workspace=workspace, run=run, description="execution example", ) print("An execution was created with id %s" % execution.id) return execution
def test_log_metadata_successfully_with_minimum_information(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) data_set_id = data_set.id # ID should not change after logging twice. e.log_input(data_set) self.assertEqual(data_set_id, data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) metrics_id = metrics.id # ID should not change after logging twice. e.log_output(metrics) self.assertEqual(metrics_id, metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id) model_id = model.id # ID should not change after logging twice. e.log_output(model) self.assertEqual(model_id, model.id)
def create_metadata_execution(): global metadata # Create Metadata Workspace and a Exec to log details mnist_train_workspace = metadata.Workspace( # Connect to metadata service in namespace kubeflow in k8s cluster. store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT), name="mnist train workspace", description="a workspace for training mnist", labels={"n1": "v1"}) run1 = metadata.Run(workspace=mnist_train_workspace, name="run-" + datetime.utcnow().isoformat("T"), description="a run in ws_1") exec = metadata.Execution(name="execution" + datetime.utcnow().isoformat("T"), workspace=mnist_train_workspace, run=run1, description="execution example") print("An execution was created with id %s" % exec.id) return exec
def test_log_metadata_successfully_with_minimum_information(self): ws1 = metadata.Workspace(backend_url_prefix="127.0.0.1:8080", name="ws_1") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet(name="mytable-dump", uri="file://path/to/dataset")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics(name="MNIST-evaluation", uri="gcs://my-bucket/mnist-eval.csv")) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist")) self.assertIsNotNone(model.id)
def test_artifact_deduplication(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="workspace_one") ws2 = metadata.Workspace(store=store, name="workspace_two") r = metadata.Run(workspace=ws1, name="first run") e = metadata.Execution(name="test execution", workspace=ws1, run=r) e2 = metadata.Execution(name="execution 2", workspace=ws1) e3 = metadata.Execution(name="execution 3", workspace=ws2) self.assertIsNotNone(e.id) self.assertIsNotNone(e2.id) model = metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist", model_type="neural network", version="v0.0.1") model2 = metadata.Model(name="MNIST", uri="gcs://my-bucket/mnist", model_type="neural network", version="v0.0.1") e.log_output(model) self.assertIsNotNone(model.id) e2.log_output(model2) self.assertIsNotNone(model2.id) self.assertEqual(model.id, model2.id)
default='train') # noqa: E501 parser.add_argument('-f', '--dataset', help='cleaned data listing') parser.add_argument('-ri', '--run_id', help='pieline run id') args = parser.parse_args() args.model = 'model/' + args.model model_path = str( Path(args.base_path).resolve(strict=False).joinpath( args.model).resolve(strict=False)) data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False) dataset = Path(args.base_path).joinpath(args.dataset) labels = {'run_id': args.run_id} ws = get_ws("Kubemlops", "MLops for Kubeflow") run = metadata.Run(workspace=ws, name="run-" + datetime.utcnow().isoformat("T"), description="Run for training TacosBurritos model") execution = metadata.Execution( name="execution" + datetime.utcnow().isoformat("T"), workspace=ws, run=run, description="tacos-burritos", ) print("An execution was created with id %s" % execution.id) info('Log model artifact') model_version = "model_version_" + str(uuid4()) log_model(args.model_name, model_path, model_version, execution, json.dumps(labels))
def get_or_create_workspace_run(md_workspace, run_name): return metadata.Run( workspace=md_workspace, name=run_name, description="Metadata run for workflow %s" % run_name, )
def _get_or_create_workspace_run(self, md_workspace, run_name): return metadata.Run(workspace=md_workspace, name=run_name, description="Experiment %s" % run_name)
def test_log_metadata_successfully(self): store = metadata.Store(grpc_host=GRPC_HOST, grpc_port=GRPC_PORT) ws1 = metadata.Workspace(store=store, name="test_log_metadata_successfully_ws", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) trainer = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(trainer.id) data_set = trainer.log_input( metadata.DataSet(description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version=str(uuid.uuid4()), query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) self.assertIsNotNone(repr(data_set)) metrics = trainer.log_output( metadata.Metrics( name="MNIST-evaluation", description= "validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) self.assertIsNotNone(repr(metrics)) model_version = str(uuid.uuid4()) model = trainer.log_output( metadata.Model(name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version=model_version, labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) self.assertIsNotNone(repr(model)) serving_application = metadata.Execution( name="serving model", workspace=ws1, description="an execution to represent model serving component", ) self.assertIsNotNone(serving_application.id) # Use model name, version, uri to uniquely identify existing model. served_model = metadata.Model( name="MNIST", uri="gcs://my-bucket/mnist", version=model_version, ) serving_application.log_input(served_model) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. model_events = ws1.store.get_events_by_artifact_ids([model.id]) self.assertEqual(len(model_events), 2) execution_ids = set(e.execution_id for e in model_events) assert execution_ids == set([serving_application.id, trainer.id]) trainer_events = ws1.store.get_events_by_execution_ids([trainer.id]) artifact_ids = set(e.artifact_id for e in trainer_events) assert artifact_ids == set([model.id, metrics.id, data_set.id])
def test_log_metadata_successfully(self): ws1 = metadata.Workspace( backend_url_prefix="127.0.0.1:8080", name="ws_1", description="a workspace for testing", labels={"n1": "v1"}) r = metadata.Run( workspace=ws1, name="first run", description="first run in ws_1", ) e = metadata.Execution( name="test execution", workspace=ws1, run=r, description="an execution", ) self.assertIsNotNone(e.id) data_set = e.log_input( metadata.DataSet( description="an example data", name="mytable-dump", owner="*****@*****.**", uri="file://path/to/dataset", version="v1.0.0", query="SELECT * FROM mytable")) self.assertIsNotNone(data_set.id) metrics = e.log_output( metadata.Metrics( name="MNIST-evaluation", description="validating the MNIST model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist-eval.csv", data_set_id="123", model_id="12345", metrics_type=metadata.Metrics.VALIDATION, values={"accuracy": 0.95}, labels={"mylabel": "l1"})) self.assertIsNotNone(metrics.id) model = e.log_output( metadata.Model( name="MNIST", description="model to recognize handwritten digits", owner="*****@*****.**", uri="gcs://my-bucket/mnist", model_type="neural network", training_framework={ "name": "tensorflow", "version": "v1.0" }, hyperparameters={ "learning_rate": 0.5, "layers": [10, 3, 1], "early_stop": True }, version="v0.0.1", labels={"mylabel": "l1"})) self.assertIsNotNone(model.id) # Test listing artifacts in a workspace self.assertTrue(len(ws1.list()) > 0) self.assertTrue(len(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.Metrics.ARTIFACT_TYPE_NAME)) > 0) self.assertTrue(len(ws1.list(metadata.DataSet.ARTIFACT_TYPE_NAME)) > 0) # Test lineage tracking. output_events = ws1.client.list_events2(model.id).events assert len(output_events) == 1 execution_id = output_events[0].execution_id assert execution_id == e.id all_events = ws1.client.list_events(execution_id).events assert len(all_events) == 3