def _create_example_context_type(): context_type = metadata_store_pb2.ContextType() context_type.name = "test_type_1" context_type.properties["foo"] = metadata_store_pb2.INT context_type.properties["bar"] = metadata_store_pb2.STRING context_type.properties["baz"] = metadata_store_pb2.DOUBLE return context_type
def _register_run_context(self, pipeline_info: data_types.PipelineInfo) -> int: """Create a new context in metadata for current pipeline run. Args: pipeline_info: pipeline information for current run. Returns: context id of the new context. """ try: context_type = self._store.get_context_type(_CONTEXT_TYPE_RUN) assert context_type, 'Context type is None for %s.' % ( _CONTEXT_TYPE_RUN) context_type_id = context_type.id except tf.errors.NotFoundError: context_type = metadata_store_pb2.ContextType( name=_CONTEXT_TYPE_RUN) context_type.properties[ 'pipeline_name'] = metadata_store_pb2.STRING context_type.properties['run_id'] = metadata_store_pb2.STRING # TODO(b/139485894): add DAG as properties. context_type_id = self._store.put_context_type(context_type) context = metadata_store_pb2.Context( type_id=context_type_id, name=pipeline_info.run_context_name) context.properties[ 'pipeline_name'].string_value = pipeline_info.pipeline_name context.properties['run_id'].string_value = pipeline_info.run_id [context_id] = self._store.put_contexts([context]) return context_id
def _get_context_id(self, reuse_workspace_if_exists): ctx = self._get_existing_context() if ctx is not None: if reuse_workspace_if_exists: return ctx.id else: raise ValueError( 'Workspace name {} already exists with id {}. You can initialize workspace with reuse_workspace_if_exists=True if want to reuse it' .format(self.name, ctx.id)) # Create new context type or get the existing type id. ctx_type = mlpb.ContextType(name=self.CONTEXT_TYPE_NAME, properties={ "description": mlpb.STRING, "labels": mlpb.STRING }) ctx_type_id = _retry(lambda: self.store.put_context_type(ctx_type)) # Add new context for workspace. prop = {} if self.description is not None: prop["description"] = mlpb.Value(string_value=self.description) if self.labels is not None: prop["labels"] = mlpb.Value(string_value=json.dumps(self.labels)) ctx = mlpb.Context( type_id=ctx_type_id, name=self.name, properties=prop, ) ctx_id = _retry(lambda: self.store.put_contexts([ctx])[0]) return ctx_id
def _register_context_type_if_not_exist( self, context_type_name: Text, properties: Dict[Text, 'metadata_store_pb2.PropertyType']) -> int: """Registers a context type if not exist, otherwise returns existing one. Args: context_type_name: the name of the context. properties: properties of the context. Returns: id of the desired context type. """ context_type = metadata_store_pb2.ContextType(name=context_type_name) for k, t in properties.items(): context_type.properties[k] = t # Types can be evolved by adding new fields in newer releases. # Here when upserting types: # a) we enable `can_add_fields` so that type updates made in the current # release are backward compatible with older release; # b) we enable `can_omit_fields` so that the current release is forward # compatible with any type updates made by future release. context_type_id = self.store.put_context_type( context_type, can_add_fields=True, can_omit_fields=True) return context_type_id
def test_put_execution_with_context(self): store = _get_metadata_store() execution_type = metadata_store_pb2.ExecutionType() execution_type.name = self._get_test_type_name() execution_type_id = store.put_execution_type(execution_type) execution = metadata_store_pb2.Execution() execution.type_id = execution_type_id artifact_type = metadata_store_pb2.ArtifactType() artifact_type.name = self._get_test_type_name() artifact_type_id = store.put_artifact_type(artifact_type) input_artifact = metadata_store_pb2.Artifact() input_artifact.type_id = artifact_type_id output_artifact = metadata_store_pb2.Artifact() output_artifact.type_id = artifact_type_id output_event = metadata_store_pb2.Event() output_event.type = metadata_store_pb2.Event.DECLARED_INPUT context_type = metadata_store_pb2.ContextType() context_type.name = self._get_test_type_name() context_type_id = store.put_context_type(context_type) context = metadata_store_pb2.Context() context.type_id = context_type_id context_name = self._get_test_type_name() context.name = context_name execution_id, artifact_ids, context_ids = store.put_execution( execution, [[input_artifact], [output_artifact, output_event]], [context]) # Test artifacts & events are correctly inserted. self.assertLen(artifact_ids, 2) events = store.get_events_by_execution_ids([execution_id]) self.assertLen(events, 1) # Test the context is correctly inserted. got_contexts = store.get_contexts_by_id(context_ids) self.assertLen(context_ids, 1) self.assertLen(got_contexts, 1) # Test the association link between execution and the context is correct. contexts_by_execution_id = store.get_contexts_by_execution( execution_id) self.assertLen(contexts_by_execution_id, 1) self.assertEqual(contexts_by_execution_id[0].name, context_name) self.assertEqual(contexts_by_execution_id[0].type_id, context_type_id) executions_by_context = store.get_executions_by_context(context_ids[0]) self.assertLen(executions_by_context, 1) # Test the attribution links between artifacts and the context are correct. contexts_by_artifact_id = store.get_contexts_by_artifact( artifact_ids[0]) self.assertLen(contexts_by_artifact_id, 1) self.assertEqual(contexts_by_artifact_id[0].name, context_name) self.assertEqual(contexts_by_artifact_id[0].type_id, context_type_id) artifacts_by_context = store.get_artifacts_by_context(context_ids[0]) self.assertLen(artifacts_by_context, 2)
def test_put_context_type_with_update_get_context_type(self): store = _get_metadata_store() context_type = metadata_store_pb2.ContextType() context_type.name = "test_type" context_type.properties["foo"] = metadata_store_pb2.INT type_id = store.put_context_type(context_type) want_context_type = metadata_store_pb2.ContextType() want_context_type.name = "test_type" want_context_type.properties["foo"] = metadata_store_pb2.INT want_context_type.properties["new_property"] = metadata_store_pb2.STRING store.put_context_type(want_context_type, can_add_fields=True) got_context_type = store.get_context_type("test_type") self.assertEqual(got_context_type.id, type_id) self.assertEqual(got_context_type.name, "test_type") self.assertEqual(got_context_type.properties["foo"], metadata_store_pb2.INT) self.assertEqual(got_context_type.properties["new_property"], metadata_store_pb2.STRING)
def get_or_create_context_type(store, type_name, properties: dict = None) -> metadata_store_pb2.ContextType: try: context_type = store.get_context_type(type_name=type_name) return context_type except: context_type = metadata_store_pb2.ContextType( name=type_name, properties=properties, ) context_type.id = store.put_context_type(context_type) # Returns ID return context_type
def _replace_pipeline_run_id_in_channel(channel: p_pb2.InputSpec.Channel, pipeline_run_id: str): """Update in place.""" for context_query in channel.context_queries: if context_query.type.name == dsl_constants.PIPELINE_RUN_CONTEXT_TYPE_NAME: context_query.name.field_value.CopyFrom( mlmd_pb2.Value(string_value=pipeline_run_id)) return channel.context_queries.append( p_pb2.InputSpec.Channel.ContextQuery( type=mlmd_pb2.ContextType( name=dsl_constants.PIPELINE_RUN_CONTEXT_TYPE_NAME), name=p_pb2.Value( field_value=mlmd_pb2.Value(string_value=pipeline_run_id))))
def _register_context_type_if_not_exist( self, context_type_name: Text, properties: Dict[Text, 'metadata_store_pb2.PropertyType']) -> int: """Registers a context type if not exist, otherwise returns existing one. Args: context_type_name: the name of the context. properties: properties of the context. Returns: id of the desired context type. """ context_type = metadata_store_pb2.ContextType(name=context_type_name) for k, t in properties.items(): context_type.properties[k] = t context_type_id = self.store.put_context_type(context_type, can_add_fields=True) return context_type_id
def register_context_if_not_exists( metadata_handler: metadata.Metadata, context_type_name: Text, context_name: Text, ) -> metadata_store_pb2.Context: """Registers a context if not exist, otherwise returns the existing one. This is a simplified wrapper around the method above which only takes context type and context name. Args: metadata_handler: A handler to access MLMD store. context_type_name: The name of the context type. context_name: The name of the context. Returns: An MLMD context. """ context_spec = pipeline_pb2.ContextSpec( name=pipeline_pb2.Value(field_value=metadata_store_pb2.Value( string_value=context_name)), type=metadata_store_pb2.ContextType(name=context_type_name)) return _register_context_if_not_exist(metadata_handler=metadata_handler, context_spec=context_spec)
dv_execution.id = dv_execution_id dv_execution.properties['state'].string_value = 'COMPLETED' # Update execution unit in the Metadata Store store.put_executions([dv_execution]) print('Data validation execution:\n', dv_execution) # ## Setting up Context Types and Generating a Context Unit # # You can group the artifacts and execution units into a `Context`. First, you need to define a `ContextType` which defines the required context. It follows a similar format as artifact and event types. You can register this with the `put_context_type()` function. # In[14]: # Create a ContextType expt_context_type = metadata_store_pb2.ContextType() expt_context_type.name = 'Experiment' expt_context_type.properties['note'] = metadata_store_pb2.STRING # Register context type to the Metadata Store expt_context_type_id = store.put_context_type(expt_context_type) # Similarly, you can create an instance of this context type and use the `put_contexts()` method to register to the store. # In[15]: # Generate the context expt_context = metadata_store_pb2.Context() expt_context.type_id = expt_context_type_id # Give the experiment a name expt_context.name = 'Demo'
def _create_example_context_type_2(type_name): context_type = metadata_store_pb2.ContextType() context_type.name = type_name context_type.properties["foo"] = metadata_store_pb2.INT context_type.properties["bar"] = metadata_store_pb2.STRING return context_type