Ejemplo n.º 1
0
    def test_bad_construction(self):
        empty_config = presto_config_pb2.PrestoConnConfig()
        self.assertRaises(RuntimeError,
                          component.PrestoExampleGen,
                          conn_config=empty_config,
                          query='')

        port_only_config = presto_config_pb2.PrestoConnConfig(port=8080)
        self.assertRaises(RuntimeError,
                          component.PrestoExampleGen,
                          conn_config=port_only_config,
                          query='')
Ejemplo n.º 2
0
  def _extract_conn_config(self, custom_config):
    unpacked_custom_config = example_gen_pb2.CustomConfig()
    json_format.Parse(custom_config, unpacked_custom_config)

    conn_config = presto_config_pb2.PrestoConnConfig()
    unpacked_custom_config.custom_config.Unpack(conn_config)
    return conn_config
Ejemplo n.º 3
0
def _PrestoToExample(  # pylint: disable=invalid-name
        pipeline: beam.Pipeline,
        input_dict: Dict[Text, List[types.Artifact]],  # pylint: disable=unused-argument
        exec_properties: Dict[Text, Any],
        split_pattern: Text) -> beam.pvalue.PCollection:
    """Read from Presto and transform to TF examples.

  Args:
    pipeline: beam pipeline.
    input_dict: Input dict from input key to a list of Artifacts.
    exec_properties: A dict of execution properties.
    split_pattern: Split.pattern in Input config, a Presto sql string.

  Returns:
    PCollection of TF examples.
  """
    conn_config = example_gen_pb2.CustomConfig()
    json_format.Parse(exec_properties['custom_config'], conn_config)
    presto_config = presto_config_pb2.PrestoConnConfig()
    conn_config.custom_config.Unpack(presto_config)

    client = _deserialize_conn_config(presto_config)
    return (pipeline
            | 'Query' >> beam.Create([split_pattern])
            | 'QueryTable' >> beam.ParDo(_ReadPrestoDoFn(client))
            | 'ToTFExample' >> beam.Map(_row_to_example))
Ejemplo n.º 4
0
  def testDeserializeConnConfig(self):
    conn_config = presto_config_pb2.PrestoConnConfig(
        host='presto.localhost', max_attempts=10)

    deseralized_conn = executor._deserialize_conn_config(conn_config)
    truth_conn = prestodb.dbapi.connect('presto.localhost', max_attempts=10)
    self.assertEqual(truth_conn.host, deseralized_conn.host)
    self.assertEqual(truth_conn.port,
                     deseralized_conn.port)  # test for default port value
    self.assertEqual(truth_conn.auth,
                     deseralized_conn.auth)  # test for default auth value
    self.assertEqual(truth_conn.max_attempts, deseralized_conn.max_attempts)
Ejemplo n.º 5
0
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.proto import evaluator_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2

_pipeline_name = 'chicago_taxi_presto'

# This example assumes that the taxi data is stored in ~/taxi/data and the
# taxi utility function is in ~/taxi.  Feel free to customize this as needed.
_taxi_root = os.path.join(os.environ['HOME'], 'taxi')
# Presto configuration that corresponds with tutorial in README.md
_presto_config = presto_config_pb2.PrestoConnConfig(host='localhost',
                                                    port=8080,
                                                    user='******',
                                                    catalog='hive',
                                                    schema='default')
# The query that extracts the Chicago taxi data examples from Presto, following
# setup as described in the README.md
_query = 'SELECT * FROM chicago_taxi_trips_parquet'
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
_module_file = os.path.join(_taxi_root, 'taxi_utils.py')
# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(_taxi_root, 'serving_model', _pipeline_name)

# Directory and data locations.  This example assumes all of the chicago taxi
# example code and metadata library is relative to $HOME, but you can store
# these files anywhere on your local filesystem.
Ejemplo n.º 6
0
 def setUp(self):
   super(ComponentTest, self).setUp()
   self.conn_config = presto_config_pb2.PrestoConnConfig(
       host='localhost', port=8080)