def register_execution(self, exec_properties: Dict[Text, Any], pipeline_info: data_types.PipelineInfo, component_info: data_types.ComponentInfo, run_context_id: Optional[int] = None) -> int: """Create a new execution in metadata. Args: exec_properties: the execution properties of the execution. pipeline_info: optional pipeline info of the execution. component_info: optional component info of the execution. run_context_id: context id for current run, link it with execution if provided. Returns: execution id of the new execution. """ execution = self._prepare_execution(EXECUTION_STATE_NEW, exec_properties, pipeline_info, component_info) [execution_id] = self._store.put_executions([execution]) if run_context_id: association = metadata_store_pb2.Association( execution_id=execution_id, context_id=run_context_id) self._store.put_attributions_and_associations( attributions=[], associations=[association]) return execution_id
def __init__(self, name: str = None, workspace: Workspace = None, run: Optional[Run] = None, description: Optional[str] = None): """ Args: name: Required name of this run. workspace: Required workspace object where this execution belongs to. run: Optional run object. description: Optional description. Creates a new execution in a workspace and run. The execution.log_XXX() methods will attach corresponding artifacts as the input or output of this execution. """ if workspace is None: raise ValueError("'workspace' must be set.") if name is None or type(name) != str: raise ValueError("'name' must be set and in string type.") self.id = None self.name = name self.workspace = workspace self.run = run self.description = description self.create_time = _get_rfc3339_time() self._type_id = _retry(lambda: self.workspace.store.get_execution_type( Execution.EXECUTION_TYPE_NAME).id) self.id = _retry(lambda: self.workspace.store.put_executions( [self.serialized()])[0]) _retry(lambda: self.workspace.store.put_attributions_and_associations([ ], [ mlpb.Association(context_id=self.workspace.context_id, execution_id=self.id) ]))
def test_put_and_use_attributions_and_associations(self): store = _get_metadata_store() context_type = _create_example_context_type(self._get_test_type_name()) context_type_id = store.put_context_type(context_type) want_context = metadata_store_pb2.Context() want_context.type_id = context_type_id want_context.name = self._get_test_type_name() [context_id] = store.put_contexts([want_context]) want_context.id = context_id execution_type = _create_example_execution_type( self._get_test_type_name()) execution_type_id = store.put_execution_type(execution_type) want_execution = metadata_store_pb2.Execution() want_execution.type_id = execution_type_id want_execution.properties["foo"].int_value = 3 [execution_id] = store.put_executions([want_execution]) want_execution.id = execution_id artifact_type = _create_example_artifact_type( self._get_test_type_name()) artifact_type_id = store.put_artifact_type(artifact_type) want_artifact = metadata_store_pb2.Artifact() want_artifact.type_id = artifact_type_id want_artifact.uri = "testuri" [artifact_id] = store.put_artifacts([want_artifact]) want_artifact.id = artifact_id # insert attribution and association and test querying the relationship attribution = metadata_store_pb2.Attribution() attribution.artifact_id = want_artifact.id attribution.context_id = want_context.id association = metadata_store_pb2.Association() association.execution_id = want_execution.id association.context_id = want_context.id store.put_attributions_and_associations([attribution], [association]) # test querying the relationship got_contexts = store.get_contexts_by_artifact(want_artifact.id) self.assertLen(got_contexts, 1) self.assertEqual(got_contexts[0].id, want_context.id) self.assertEqual(got_contexts[0].name, want_context.name) got_arifacts = store.get_artifacts_by_context(want_context.id) self.assertLen(got_arifacts, 1) self.assertEqual(got_arifacts[0].uri, want_artifact.uri) got_executions = store.get_executions_by_context(want_context.id) self.assertLen(got_executions, 1) self.assertEqual(got_executions[0].properties["foo"], want_execution.properties["foo"]) got_contexts = store.get_contexts_by_execution(want_execution.id) self.assertLen(got_contexts, 1) self.assertEqual(got_contexts[0].id, want_context.id) self.assertEqual(got_contexts[0].name, want_context.name)
def _create_execution_in_run_context(self): run_id = metadata_store_pb2.Value( string_value=kfputils.format_kfp_run_id_uri(self.run_uuid)) pipeline_name = metadata_store_pb2.Value( string_value=self.pipeline_name) component_id = metadata_store_pb2.Value(string_value=self.component_id) state = metadata_store_pb2.Execution.RUNNING state_mlmd_value = metadata_store_pb2.Value( string_value=KALE_EXECUTION_STATE_RUNNING) property_types = { "run_id": metadata_store_pb2.STRING, "pipeline_name": metadata_store_pb2.STRING, "component_id": metadata_store_pb2.STRING, MLMD_EXECUTION_STATE_KEY: metadata_store_pb2.STRING } properties = { "run_id": run_id, "pipeline_name": pipeline_name, "component_id": component_id, MLMD_EXECUTION_STATE_KEY: state_mlmd_value } exec_hash_mlmd_value = metadata_store_pb2.Value( string_value=self.execution_hash) pod_name_mlmd_value = metadata_store_pb2.Value( string_value=self.pod_name) pod_namespace_mlmd = metadata_store_pb2.Value( string_value=self.pod_namespace) custom_props = { MLMD_EXECUTION_HASH_PROPERTY_KEY: exec_hash_mlmd_value, MLMD_EXECUTION_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value, MLMD_EXECUTION_CACHE_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value, MLMD_EXECUTION_POD_NAMESPACE_PROPERTY_KEY: pod_namespace_mlmd, KALE_EXECUTION_STATE_KEY: state_mlmd_value } execution = self._create_execution_with_type( type_name=self.component_id, property_types=property_types, properties=properties, custom_properties=custom_props, state=state) association = metadata_store_pb2.Association( execution_id=execution.id, context_id=self.run_context.id) self.store.put_attributions_and_associations([], [association]) return execution
def create_new_execution_in_existing_context( store, execution_type_name: str, context_id: int, properties: dict = None, execution_type_properties: dict = None, ) -> metadata_store_pb2.Execution: execution = create_execution_with_type( store=store, properties=properties, type_name=execution_type_name, type_properties=execution_type_properties, ) association = metadata_store_pb2.Association( execution_id=execution.id, context_id=context_id, ) store.put_attributions_and_associations([], [association]) return execution
print('Experiment Context ID: ', expt_context_id) # ## Generate attribution and association relationships # # With the `Context` defined, you can now create its relationship with the artifact and executions you previously used. You will create the relationship between schema artifact unit and experiment context unit to form an `Attribution`. # Similarly, you will create the relationship between data validation execution unit and experiment context unit to form an `Association`. These are registered with the `put_attributions_and_associations()` method. # In[16]: # Generate the attribution expt_attribution = metadata_store_pb2.Attribution() expt_attribution.artifact_id = schema_artifact_id expt_attribution.context_id = expt_context_id # Generate the association expt_association = metadata_store_pb2.Association() expt_association.execution_id = dv_execution_id expt_association.context_id = expt_context_id # Submit attribution and association to the Metadata Store store.put_attributions_and_associations([expt_attribution], [expt_association]) print('Experiment Attribution:\n', expt_attribution) print('Experiment Association:\n', expt_association) # ## Retrieving Information from the Metadata Store # # You've now recorded the needed information to the metadata store. If we did this in a persistent database, you can track which artifacts and events are related to each other even without seeing the code used to generate it. See a sample run below where you investigate what dataset is used to generate the schema. (**It would be obvious which dataset is used in our simple demo because we only have two artifacts registered. Thus, assume that you have thousands of entries in the metadata store.*) # In[17]: