Exemple #1
0
    def register_execution(self,
                           exec_properties: Dict[Text, Any],
                           pipeline_info: data_types.PipelineInfo,
                           component_info: data_types.ComponentInfo,
                           run_context_id: Optional[int] = None) -> int:
        """Create a new execution in metadata.

    Args:
      exec_properties: the execution properties of the execution.
      pipeline_info: optional pipeline info of the execution.
      component_info: optional component info of the execution.
      run_context_id: context id for current run, link it with execution if
        provided.

    Returns:
      execution id of the new execution.
    """
        execution = self._prepare_execution(EXECUTION_STATE_NEW,
                                            exec_properties, pipeline_info,
                                            component_info)
        [execution_id] = self._store.put_executions([execution])

        if run_context_id:
            association = metadata_store_pb2.Association(
                execution_id=execution_id, context_id=run_context_id)
            self._store.put_attributions_and_associations(
                attributions=[], associations=[association])

        return execution_id
Exemple #2
0
    def __init__(self,
                 name: str = None,
                 workspace: Workspace = None,
                 run: Optional[Run] = None,
                 description: Optional[str] = None):
        """
    Args:
      name: Required name of this run.
      workspace: Required workspace object where this execution belongs to.
      run: Optional run object.
      description: Optional description.

    Creates a new execution in a workspace and run.
    The execution.log_XXX() methods will attach corresponding artifacts as the
    input or output of this execution.
    """
        if workspace is None:
            raise ValueError("'workspace' must be set.")
        if name is None or type(name) != str:
            raise ValueError("'name' must be set and in string type.")
        self.id = None
        self.name = name
        self.workspace = workspace
        self.run = run
        self.description = description
        self.create_time = _get_rfc3339_time()
        self._type_id = _retry(lambda: self.workspace.store.get_execution_type(
            Execution.EXECUTION_TYPE_NAME).id)
        self.id = _retry(lambda: self.workspace.store.put_executions(
            [self.serialized()])[0])
        _retry(lambda: self.workspace.store.put_attributions_and_associations([
        ], [
            mlpb.Association(context_id=self.workspace.context_id,
                             execution_id=self.id)
        ]))
Exemple #3
0
    def test_put_and_use_attributions_and_associations(self):
        store = _get_metadata_store()
        context_type = _create_example_context_type(self._get_test_type_name())
        context_type_id = store.put_context_type(context_type)
        want_context = metadata_store_pb2.Context()
        want_context.type_id = context_type_id
        want_context.name = self._get_test_type_name()
        [context_id] = store.put_contexts([want_context])
        want_context.id = context_id

        execution_type = _create_example_execution_type(
            self._get_test_type_name())
        execution_type_id = store.put_execution_type(execution_type)
        want_execution = metadata_store_pb2.Execution()
        want_execution.type_id = execution_type_id
        want_execution.properties["foo"].int_value = 3
        [execution_id] = store.put_executions([want_execution])
        want_execution.id = execution_id

        artifact_type = _create_example_artifact_type(
            self._get_test_type_name())
        artifact_type_id = store.put_artifact_type(artifact_type)
        want_artifact = metadata_store_pb2.Artifact()
        want_artifact.type_id = artifact_type_id
        want_artifact.uri = "testuri"
        [artifact_id] = store.put_artifacts([want_artifact])
        want_artifact.id = artifact_id

        # insert attribution and association and test querying the relationship
        attribution = metadata_store_pb2.Attribution()
        attribution.artifact_id = want_artifact.id
        attribution.context_id = want_context.id
        association = metadata_store_pb2.Association()
        association.execution_id = want_execution.id
        association.context_id = want_context.id
        store.put_attributions_and_associations([attribution], [association])

        # test querying the relationship
        got_contexts = store.get_contexts_by_artifact(want_artifact.id)
        self.assertLen(got_contexts, 1)
        self.assertEqual(got_contexts[0].id, want_context.id)
        self.assertEqual(got_contexts[0].name, want_context.name)
        got_arifacts = store.get_artifacts_by_context(want_context.id)
        self.assertLen(got_arifacts, 1)
        self.assertEqual(got_arifacts[0].uri, want_artifact.uri)
        got_executions = store.get_executions_by_context(want_context.id)
        self.assertLen(got_executions, 1)
        self.assertEqual(got_executions[0].properties["foo"],
                         want_execution.properties["foo"])
        got_contexts = store.get_contexts_by_execution(want_execution.id)
        self.assertLen(got_contexts, 1)
        self.assertEqual(got_contexts[0].id, want_context.id)
        self.assertEqual(got_contexts[0].name, want_context.name)
Exemple #4
0
    def _create_execution_in_run_context(self):
        run_id = metadata_store_pb2.Value(
            string_value=kfputils.format_kfp_run_id_uri(self.run_uuid))
        pipeline_name = metadata_store_pb2.Value(
            string_value=self.pipeline_name)
        component_id = metadata_store_pb2.Value(string_value=self.component_id)
        state = metadata_store_pb2.Execution.RUNNING
        state_mlmd_value = metadata_store_pb2.Value(
            string_value=KALE_EXECUTION_STATE_RUNNING)

        property_types = {
            "run_id": metadata_store_pb2.STRING,
            "pipeline_name": metadata_store_pb2.STRING,
            "component_id": metadata_store_pb2.STRING,
            MLMD_EXECUTION_STATE_KEY: metadata_store_pb2.STRING
        }
        properties = {
            "run_id": run_id,
            "pipeline_name": pipeline_name,
            "component_id": component_id,
            MLMD_EXECUTION_STATE_KEY: state_mlmd_value
        }

        exec_hash_mlmd_value = metadata_store_pb2.Value(
            string_value=self.execution_hash)
        pod_name_mlmd_value = metadata_store_pb2.Value(
            string_value=self.pod_name)
        pod_namespace_mlmd = metadata_store_pb2.Value(
            string_value=self.pod_namespace)
        custom_props = {
            MLMD_EXECUTION_HASH_PROPERTY_KEY: exec_hash_mlmd_value,
            MLMD_EXECUTION_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value,
            MLMD_EXECUTION_CACHE_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value,
            MLMD_EXECUTION_POD_NAMESPACE_PROPERTY_KEY: pod_namespace_mlmd,
            KALE_EXECUTION_STATE_KEY: state_mlmd_value
        }
        execution = self._create_execution_with_type(
            type_name=self.component_id,
            property_types=property_types,
            properties=properties,
            custom_properties=custom_props,
            state=state)

        association = metadata_store_pb2.Association(
            execution_id=execution.id, context_id=self.run_context.id)
        self.store.put_attributions_and_associations([], [association])
        return execution
Exemple #5
0
def create_new_execution_in_existing_context(
    store,
    execution_type_name: str,
    context_id: int,
    properties: dict = None,
    execution_type_properties: dict = None,
) -> metadata_store_pb2.Execution:
    execution = create_execution_with_type(
        store=store,
        properties=properties,
        type_name=execution_type_name,
        type_properties=execution_type_properties,
    )
    association = metadata_store_pb2.Association(
        execution_id=execution.id,
        context_id=context_id,
    )

    store.put_attributions_and_associations([], [association])
    return execution
Exemple #6
0
print('Experiment Context ID: ', expt_context_id)

# ## Generate attribution and association relationships
#
# With the `Context` defined, you can now create its relationship with the artifact and executions you previously used. You will create the relationship between schema artifact unit and experiment context unit to form an `Attribution`.
# Similarly, you will create the relationship between data validation execution unit and experiment context unit to form an `Association`. These are registered with the `put_attributions_and_associations()` method.

# In[16]:

# Generate the attribution
expt_attribution = metadata_store_pb2.Attribution()
expt_attribution.artifact_id = schema_artifact_id
expt_attribution.context_id = expt_context_id

# Generate the association
expt_association = metadata_store_pb2.Association()
expt_association.execution_id = dv_execution_id
expt_association.context_id = expt_context_id

# Submit attribution and association to the Metadata Store
store.put_attributions_and_associations([expt_attribution], [expt_association])

print('Experiment Attribution:\n', expt_attribution)
print('Experiment Association:\n', expt_association)

# ## Retrieving Information from the Metadata Store
#
# You've now recorded the needed information to the metadata store. If we did this in a persistent database, you can track which artifacts and events are related to each other even without seeing the code used to generate it. See a sample run below where you investigate what dataset is used to generate the schema. (**It would be obvious which dataset is used in our simple demo because we only have two artifacts registered. Thus, assume that you have thousands of entries in the metadata store.*)

# In[17]: