def test_connection_config_with_retry_options(self): # both client and grpc modes have none-zero setting by default. store = _get_metadata_store() self.assertGreater(store._max_num_retries, 0) connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() want_num_retries = 100 connection_config.retry_options.max_num_retries = want_num_retries store = metadata_store.MetadataStore(connection_config) self.assertEqual(store._max_num_retries, want_num_retries)
def test_enable_metadata_store_upgrade_migration(self): # create a metadata store and downgrade to version 0 db_file = os.path.join(absltest.get_default_test_tmpdir(), "test.db") connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.filename_uri = db_file metadata_store.MetadataStore(connection_config) metadata_store.downgrade_schema(connection_config, 0) upgrade_conn_config = metadata_store_pb2.ConnectionConfig() upgrade_conn_config.sqlite.filename_uri = db_file with self.assertRaisesRegex(RuntimeError, "Schema migration is disabled."): # if disabled then the store cannot be used. metadata_store.MetadataStore(upgrade_conn_config) # if enable, then the store can be created metadata_store.MetadataStore(upgrade_conn_config, enable_upgrade_migration=True) os.remove(db_file)
def from_sqlite_db(filename_uri): """Returns a `TFXReadonlyMetadataStore` based off a SQLITE db uri. Args: filename_uri: A `str` indicating the path to the SQLITE db. Returns: A `TFXReadonlyMetadataStore` based off a SQLITE db uri. """ c = metadata_store_pb2.ConnectionConfig() c.sqlite.filename_uri = filename_uri return TFXReadonlyMetadataStore(metadata_store.MetadataStore(c))
def test_unset_connection_config(self): connection_config = metadata_store_pb2.ConnectionConfig() for _ in range(100): # It will throw a SystemError or RuntimeError, depending upon the version # of Python. try: metadata_store.MetadataStore(connection_config) raise ValueError("Should have already thrown an exception.") except RuntimeError: # Raises a RuntimeError in Python 2.7. pass except SystemError: # Raises a SystemError in Python 3.6. pass
def __enter__(self) -> 'Metadata': # TODO(ruoyu): Establishing a connection pool instead of newing # a connection every time. Until then, check self._store before usage # in every method. for _ in range(_MAX_INIT_RETRY): try: self._store = metadata_store.MetadataStore(self._connection_config) except RuntimeError: # MetadataStore could raise Aborted error if multiple concurrent # connections try to execute initialization DDL in database. # This is safe to retry. time.sleep(random.random()) continue else: return self raise RuntimeError('Failed to establish connection to Metadata storage.')
def _connect(): def establish_connection(store): """Ensure connection to MLMD store by making a request.""" try: _ = store.get_context_types() return True except Exception as e: log.warning( "Failed to access the Metadata store. Exception:" " '%s'", str(e)) return False metadata_service_host = os.environ.get( METADATA_GRPC_SERVICE_SERVICE_HOST_ENV, DEFAULT_METADATA_GRPC_SERVICE_SERVICE_HOST) metadata_service_port = int( os.environ.get(METADATA_GRPC_SERVICE_SERVICE_PORT_ENV, DEFAULT_METADATA_GRPC_SERVICE_SERVICE_PORT)) metadata_service_max_msg = int( os.environ.get(METADATA_GRPC_MAX_RECEIVE_MESSAGE_LENGTH_ENV, DEFAULT_METADATA_GRPC_MAX_RECEIVE_MESSAGE_LENGTH)) metadata_service_channel_args = GrpcChannelArguments( max_receive_message_length=metadata_service_max_msg) mlmd_connection_config = MetadataStoreClientConfig( host=metadata_service_host, port=metadata_service_port, channel_arguments=metadata_service_channel_args) mlmd_store = metadata_store.MetadataStore(mlmd_connection_config) # We ensure that the connection to MLMD is established by retrying a # number of times and sleeping for 1 second between the tries. # These numbers are taken from the MetadataWriter implementation. for _ in range(100): if establish_connection(mlmd_store): return mlmd_store time.sleep(1) raise RuntimeError("Could not connect to the Metadata store.")
def __enter__(self) -> 'Metadata': # TODO(ruoyu): Establishing a connection pool instead of newing # a connection every time. Until then, check self._store before usage # in every method. self._store = metadata_store.MetadataStore(self._connection_config) return self
# For each of these steps, you may want to have the [MetadataStore API documentation](https://www.tensorflow.org/tfx/ml_metadata/api_docs/python/mlmd/MetadataStore) open so you can lookup any of the methods you will be using to interact with the metadata store. You can also look at the `metadata_store` protocol buffer [here](https://github.com/google/ml-metadata/blob/r0.24.0/ml_metadata/proto/metadata_store.proto) to see descriptions of each data type covered in this tutorial. # ## Define ML Metadata's Storage Database # # The first step would be to instantiate your storage backend. As mentioned in class, there are several types supported such as fake (temporary) database, SQLite, MySQL, and even cloud-based storage. For this demo, you will just be using a fake database for quick experimentation. # In[3]: # Instantiate a connection config connection_config = metadata_store_pb2.ConnectionConfig() # Set an empty fake database proto connection_config.fake_database.SetInParent() # Setup the metadata store store = metadata_store.MetadataStore(connection_config) # ## Register ArtifactTypes # # Next, you will create the artifact types needed and register them to the store. Since our simple exercise will just involve generating a schema using TFDV, you will only create two artifact types: one for the **input dataset** and another for the **output schema**. The main steps will be to: # # * Declare an `ArtifactType()` # * Define the name of the artifact type # * Define the necessary properties within these artifact types. For example, it is important to know the data split name so you may want to have a `split` property for the artifact type that holds datasets. # * Use `put_artifact_type()` to register them to the metadata store. This generates an `id` that you can use later to refer to a particular artifact type. # # *Bonus: For practice, you can also extend the code below to create an artifact type for the statistics.* # In[4]: # Create ArtifactType for the input dataset
def _get_metadata_store(): connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() return metadata_store.MetadataStore(connection_config)
def store(self): return metadata_store.MetadataStore(self.get_tfx_metadata_config())
def test_unset_connection_config(self): connection_config = metadata_store_pb2.ConnectionConfig() for _ in range(3): with self.assertRaises(RuntimeError): metadata_store.MetadataStore(connection_config)
def store(self) -> metadata_store.MetadataStore: """General property that hooks into TFX metadata store.""" # TODO [ENG-133]: this always gets recreated, is this intended? return metadata_store.MetadataStore(self.get_tfx_metadata_config())
def _get_empty_metadata_store(self): """Returns an empty in memory mlmd store.""" empty_db_config = metadata_store_pb2.ConnectionConfig() empty_db_config.fake_database.SetInParent() return metadata_store.MetadataStore(empty_db_config)