def test_put_duplicated_attributions_and_empty_associations(self): store = _get_metadata_store() context_type = _create_example_context_type() context_type_id = store.put_context_type(context_type) want_context = metadata_store_pb2.Context() want_context.type_id = context_type_id want_context.name = "context" [context_id] = store.put_contexts([want_context]) want_context.id = context_id artifact_type = _create_example_artifact_type() artifact_type_id = store.put_artifact_type(artifact_type) want_artifact = metadata_store_pb2.Artifact() want_artifact.type_id = artifact_type_id want_artifact.uri = "testuri" [artifact_id] = store.put_artifacts([want_artifact]) want_artifact.id = artifact_id attribution = metadata_store_pb2.Attribution() attribution.artifact_id = want_artifact.id attribution.context_id = want_context.id store.put_attributions_and_associations([attribution, attribution], []) got_contexts = store.get_contexts_by_artifact(want_artifact.id) self.assertLen(got_contexts, 1) self.assertEqual(got_contexts[0].id, want_context.id) self.assertEqual(got_contexts[0].name, want_context.name) got_arifacts = store.get_artifacts_by_context(want_context.id) self.assertLen(got_arifacts, 1) self.assertEqual(got_arifacts[0].uri, want_artifact.uri) self.assertEmpty(store.get_executions_by_context(want_context.id))
def create_new_artifact_event_and_attribution( store, execution_id: int, context_id: int, uri: str, type_name: str, event_type: metadata_store_pb2.Event.Type, properties: dict = None, artifact_type_properties: dict = None, artifact_name_path: metadata_store_pb2.Event.Path = None, milliseconds_since_epoch: int = None, ) -> metadata_store_pb2.Artifact: artifact = create_artifact_with_type( store=store, uri=uri, type_name=type_name, type_properties=artifact_type_properties, properties=properties, ) event = metadata_store_pb2.Event( execution_id=execution_id, artifact_id=artifact.id, type=event_type, path=artifact_name_path, milliseconds_since_epoch=milliseconds_since_epoch, ) store.put_events([event]) attribution = metadata_store_pb2.Attribution( context_id=context_id, artifact_id=artifact.id, ) store.put_attributions_and_associations([attribution], []) return artifact
def test_put_and_use_attributions_and_associations(self): store = _get_metadata_store() context_type = _create_example_context_type(self._get_test_type_name()) context_type_id = store.put_context_type(context_type) want_context = metadata_store_pb2.Context() want_context.type_id = context_type_id want_context.name = self._get_test_type_name() [context_id] = store.put_contexts([want_context]) want_context.id = context_id execution_type = _create_example_execution_type( self._get_test_type_name()) execution_type_id = store.put_execution_type(execution_type) want_execution = metadata_store_pb2.Execution() want_execution.type_id = execution_type_id want_execution.properties["foo"].int_value = 3 [execution_id] = store.put_executions([want_execution]) want_execution.id = execution_id artifact_type = _create_example_artifact_type( self._get_test_type_name()) artifact_type_id = store.put_artifact_type(artifact_type) want_artifact = metadata_store_pb2.Artifact() want_artifact.type_id = artifact_type_id want_artifact.uri = "testuri" [artifact_id] = store.put_artifacts([want_artifact]) want_artifact.id = artifact_id # insert attribution and association and test querying the relationship attribution = metadata_store_pb2.Attribution() attribution.artifact_id = want_artifact.id attribution.context_id = want_context.id association = metadata_store_pb2.Association() association.execution_id = want_execution.id association.context_id = want_context.id store.put_attributions_and_associations([attribution], [association]) # test querying the relationship got_contexts = store.get_contexts_by_artifact(want_artifact.id) self.assertLen(got_contexts, 1) self.assertEqual(got_contexts[0].id, want_context.id) self.assertEqual(got_contexts[0].name, want_context.name) got_arifacts = store.get_artifacts_by_context(want_context.id) self.assertLen(got_arifacts, 1) self.assertEqual(got_arifacts[0].uri, want_artifact.uri) got_executions = store.get_executions_by_context(want_context.id) self.assertLen(got_executions, 1) self.assertEqual(got_executions[0].properties["foo"], want_execution.properties["foo"]) got_contexts = store.get_contexts_by_execution(want_execution.id) self.assertLen(got_contexts, 1) self.assertEqual(got_contexts[0].id, want_context.id) self.assertEqual(got_contexts[0].name, want_context.name)
def _log(self, artifact): """Log artifact into metadata store.""" # Sanity checks for artifact. if artifact is None: raise ValueError("'artifact' must be set.") try: type_id = _retry(lambda: self.workspace.store.get_artifact_type( artifact.ARTIFACT_TYPE_NAME).id) except Exception as e: raise ValueError("invalid artifact type %s: exception %s", artifact.ARTIFACT_TYPE_NAME, e) # if id is set, then this artifact is already saved in database. if artifact.id is not None: self._check_artifact_id(artifact.id) return artifact # Construct a new artifact serialization. ser = artifact.serialization() ser.type_id = type_id if _WORKSPACE_PROPERTY_NAME in ser.custom_properties: raise ValueError("custom_properties contains reserved key %s" % _WORKSPACE_PROPERTY_NAME) if _RUN_PROPERTY_NAME in ser.custom_properties: raise ValueError("custom_properties contains reserved key %s" % _RUN_PROPERTY_NAME) if self.workspace is not None: ser.custom_properties[ _WORKSPACE_PROPERTY_NAME].string_value = self.workspace.name if self.run is not None: ser.custom_properties[ _RUN_PROPERTY_NAME].string_value = self.run.name # Deduplicate artifact for existing one in the database. pbs = _retry( lambda: self.workspace.store.get_artifacts_by_uri(artifact.uri)) for pb in pbs: if artifact.is_duplicated(ser, pb): artifact.id = pb.id return artifact artifact.id = _retry( lambda: self.workspace.store.put_artifacts([ser])[0]) _retry(lambda: self.workspace.store.put_attributions_and_associations([ mlpb.Attribution(context_id=self.workspace.context_id, artifact_id=artifact.id) ], [])) return artifact
def _link_artifact_as_output(self, artifact): self._link_artifact(artifact, metadata_store_pb2.Event.OUTPUT) attribution = metadata_store_pb2.Attribution( context_id=self.run_context.id, artifact_id=artifact.id) self.store.put_attributions_and_associations([attribution], [])
print('Experiment Context type:\n', expt_context_type) print('Experiment Context type ID: ', expt_context_type_id) print('Experiment Context:\n', expt_context) print('Experiment Context ID: ', expt_context_id) # ## Generate attribution and association relationships # # With the `Context` defined, you can now create its relationship with the artifact and executions you previously used. You will create the relationship between schema artifact unit and experiment context unit to form an `Attribution`. # Similarly, you will create the relationship between data validation execution unit and experiment context unit to form an `Association`. These are registered with the `put_attributions_and_associations()` method. # In[16]: # Generate the attribution expt_attribution = metadata_store_pb2.Attribution() expt_attribution.artifact_id = schema_artifact_id expt_attribution.context_id = expt_context_id # Generate the association expt_association = metadata_store_pb2.Association() expt_association.execution_id = dv_execution_id expt_association.context_id = expt_context_id # Submit attribution and association to the Metadata Store store.put_attributions_and_associations([expt_attribution], [expt_association]) print('Experiment Attribution:\n', expt_attribution) print('Experiment Association:\n', expt_association) # ## Retrieving Information from the Metadata Store