def testBadConstruction(self): empty_config = presto_config_pb2.PrestoConnConfig() self.assertRaises(RuntimeError, component.PrestoExampleGen, conn_config=empty_config, query='') port_only_config = presto_config_pb2.PrestoConnConfig(port=8080) self.assertRaises(RuntimeError, component.PrestoExampleGen, conn_config=port_only_config, query='')
def _extract_conn_config(self, custom_config): unpacked_custom_config = example_gen_pb2.CustomConfig() proto_utils.json_to_proto(custom_config, unpacked_custom_config) conn_config = presto_config_pb2.PrestoConnConfig() unpacked_custom_config.custom_config.Unpack(conn_config) return conn_config
def _extract_conn_config(self, custom_config): unpacked_custom_config = example_gen_pb2.CustomConfig() json_format.Parse(custom_config, unpacked_custom_config) conn_config = presto_config_pb2.PrestoConnConfig() unpacked_custom_config.custom_config.Unpack(conn_config) return conn_config
def testDeserializeConnConfig(self): conn_config = presto_config_pb2.PrestoConnConfig( host='presto.localhost', max_attempts=10) deseralized_conn = executor._deserialize_conn_config(conn_config) truth_conn = prestodb.dbapi.connect('presto.localhost', max_attempts=10) self.assertEqual(truth_conn.host, deseralized_conn.host) self.assertEqual(truth_conn.port, deseralized_conn.port) # test for default port value self.assertEqual(truth_conn.auth, deseralized_conn.auth) # test for default auth value self.assertEqual(truth_conn.max_attempts, deseralized_conn.max_attempts)
def _PrestoToExample( # pylint: disable=invalid-name pipeline: beam.Pipeline, exec_properties: Dict[Text, Any], split_pattern: Text) -> beam.pvalue.PCollection: """Read from Presto and transform to TF examples. Args: pipeline: beam pipeline. exec_properties: A dict of execution properties. split_pattern: Split.pattern in Input config, a Presto sql string. Returns: PCollection of TF examples. """ conn_config = example_gen_pb2.CustomConfig() json_format.Parse(exec_properties['custom_config'], conn_config) presto_config = presto_config_pb2.PrestoConnConfig() conn_config.custom_config.Unpack(presto_config) client = _deserialize_conn_config(presto_config) return (pipeline | 'Query' >> beam.Create([split_pattern]) | 'QueryTable' >> beam.ParDo(_ReadPrestoDoFn(client)) | 'ToTFExample' >> beam.Map(_row_to_example))
from tfx.orchestration import metadata from tfx.orchestration import pipeline from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner from tfx.proto import evaluator_pb2 from tfx.proto import pusher_pb2 from tfx.proto import trainer_pb2 _pipeline_name = 'chicago_taxi_presto' # This example assumes that the taxi data is stored in ~/taxi/data and the # taxi utility function is in ~/taxi. Feel free to customize this as needed. _taxi_root = os.path.join(os.environ['HOME'], 'taxi') # Presto configuration that corresponds with tutorial in README.md _presto_config = presto_config_pb2.PrestoConnConfig(host='localhost', port=8080, user='******', catalog='hive', schema='default') # The query that extracts the Chicago taxi data examples from Presto, following # setup as described in the README.md _query = 'SELECT * FROM chicago_taxi_trips_parquet' # Python module file to inject customized logic into the TFX components. The # Transform and Trainer both require user-defined functions to run successfully. _module_file = os.path.join(_taxi_root, 'taxi_utils.py') # Path which can be listened to by the model server. Pusher will output the # trained model here. _serving_model_dir = os.path.join(_taxi_root, 'serving_model', _pipeline_name) # Directory and data locations. This example assumes all of the chicago taxi # example code and metadata library is relative to $HOME, but you can store # these files anywhere on your local filesystem.
from tfx.examples.custom_components.presto_example_gen.proto import presto_config_pb2 from tfx.examples.custom_components.presto_example_gen.presto_component.component import PrestoExampleGen query = """ SELECT * FROM `<project_id>.<database>.<table_name>` """ presto_config = presto_config_pb2.PrestoConnConfig( host='localhost', port=8080) example_gen = PrestoExampleGen(presto_config, query=query)
def setUp(self): super().setUp() self.conn_config = presto_config_pb2.PrestoConnConfig(host='localhost', port=8080)