def display_data(self): res = {'fn': (self._fn.__name__ if hasattr(self._fn, '__name__') else self._fn.__class__), 'args': DisplayDataItem(str(self._args)).drop_if_default('()'), 'kwargs': DisplayDataItem(str(self._kwargs)).drop_if_default('{}')} return res
def display_data(self): return { 'some_val': DisplayDataItem('something').drop_if_none(), 'non_val': DisplayDataItem(None).drop_if_none(), 'def_val': DisplayDataItem(True).drop_if_default(True), 'nodef_val': DisplayDataItem(True).drop_if_default(False) }
def display_data(self): # type: () -> dict parent_dd = super(MyPTransform, self).display_data() parent_dd['dd_string'] = DisplayDataItem( 'dd_string_value', label='dd_string_label') parent_dd['dd_bool'] = DisplayDataItem(False, label='dd_bool_label') parent_dd['dd_int'] = DisplayDataItem(1.1, label='dd_int_label') return parent_dd
def display_data(self): parent_dd = super(_TextSource, self).display_data() parent_dd['strip_newline'] = DisplayDataItem( self._strip_trailing_newlines, label='Strip Trailing New Lines') parent_dd['buffer_size'] = DisplayDataItem( self._buffer_size, label='Buffer Size') parent_dd['coder'] = DisplayDataItem(self._coder.__class__, label='Coder') return parent_dd
def display_data(self): return { 'file_pattern': DisplayDataItem(str(self._pattern), label="File Pattern"), 'compression': DisplayDataItem(str(self._compression_type), label='Compression Type') }
def display_data(self): return { 'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic'), 'id_label': DisplayDataItem(self.id_label, label='ID Label Attribute'), 'with_attributes': DisplayDataItem( self.with_attributes, label='With Attributes').drop_if_none(), 'timestamp_attribute': DisplayDataItem( self.timestamp_attribute, label='Timestamp Attribute'), }
def _display_data(num_quantiles, key, reverse): return { 'num_quantiles': DisplayDataItem(num_quantiles, label="Quantile Count"), 'key': DisplayDataItem( key.__name__ if hasattr(key, '__name__') else key.__class__.__name__, label='Record Comparer Key'), 'reverse': DisplayDataItem(str(reverse), label='Is reversed') }
def display_data(self): return { 'projectId': DisplayDataItem( self.beam_options['project_id'], label='Bigtable Project Id'), 'instanceId': DisplayDataItem( self.beam_options['instance_id'], label='Bigtable Instance Id'), 'tableId': DisplayDataItem( self.beam_options['table_id'], label='Bigtable Table Id') }
def display_data(self): return {'id_label': DisplayDataItem(self.id_label, label='ID Label Attribute').drop_if_none(), 'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic').drop_if_none(), 'subscription': DisplayDataItem(self.full_subscription, label='Pubsub Subscription').drop_if_none()}
def display_data(self): return {'shards': DisplayDataItem(self.num_shards, label='Number of Shards').drop_if_default(0), 'compression': DisplayDataItem(str(self.compression_type)), 'file_pattern': DisplayDataItem('{}{}{}'.format(self.file_path_prefix, self.shard_name_format, self.file_name_suffix), label='File Pattern')}
def _display_data(num_quantiles, key, reverse, weighted, input_batched): return { 'num_quantiles': DisplayDataItem(num_quantiles, label='Quantile Count'), 'key': DisplayDataItem( key.__name__ if hasattr(key, '__name__') else key.__class__.__name__, label='Record Comparer Key'), 'reverse': DisplayDataItem(str(reverse), label='Is Reversed'), 'weighted': DisplayDataItem(str(weighted), label='Is Weighted'), 'input_batched': DisplayDataItem( str(input_batched), label='Is Input Batched'), }
def display_data(self): res = {} if self.table_reference is not None: tableSpec = '{}.{}'.format(self.table_reference.datasetId, self.table_reference.tableId) if self.table_reference.projectId is not None: tableSpec = '{}:{}'.format(self.table_reference.projectId, tableSpec) res['table'] = DisplayDataItem(tableSpec, label='Table') res['validation'] = DisplayDataItem(self.validate, label="Validation Enabled") return res
def display_data(self): return { 'n': self._n, 'compare': DisplayDataItem( self._compare.__name__ if hasattr(self._compare, '__name__') else self._compare.__class__.__name__).drop_if_none() }
def test_remote_runner_display_data(self): remote_runner = DataflowRunner() p = Pipeline(remote_runner, options=PipelineOptions(self.default_properties)) now = datetime.now() # pylint: disable=expression-not-assigned (p | ptransform.Create([1, 2, 3, 4, 5]) | 'Do' >> SpecialParDo(SpecialDoFn(), now)) # TODO(BEAM-366) Enable runner API on this test. p.run(test_runner_api=False) job_dict = json.loads(str(remote_runner.job)) steps = [step for step in job_dict['steps'] if len(step['properties'].get('display_data', [])) > 0] step = steps[1] disp_data = step['properties']['display_data'] nspace = SpecialParDo.__module__+ '.' expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo', 'value': DisplayDataItem._format_value(now, 'TIMESTAMP'), 'key': 'a_time'}, {'type': 'STRING', 'namespace': nspace+'SpecialParDo', 'value': nspace+'SpecialParDo', 'key': 'a_class', 'shortValue': 'SpecialParDo'}, {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn', 'value': 42, 'key': 'dofn_value'}] self.assertUnhashableCountEqual(disp_data, expected_data)
def display_data(self): if self.query is not None: res = {'query': DisplayDataItem(self.query, label='Query')} else: if self.table_reference.projectId is not None: tableSpec = '{}:{}.{}'.format(self.table_reference.projectId, self.table_reference.datasetId, self.table_reference.tableId) else: tableSpec = '{}.{}'.format(self.table_reference.datasetId, self.table_reference.tableId) res = {'table': DisplayDataItem(tableSpec, label='Table')} res['validation'] = DisplayDataItem(self.validate, label='Validation Enabled') return res
def display_data(self): return {'static_integer': 120, 'static_string': 'static me!', 'complex_url': DisplayDataItem('github.com', url='http://github.com', label='The URL'), 'python_class': HasDisplayData, 'my_dd': self.my_display_data}
def display_data(self): result = { 'create_disposition': str(self.create_disposition), 'write_disposition': str(self.write_disposition), } result['schema'] = str(self.schema) result['launchesBigQueryJobs'] = DisplayDataItem( True, label="This Dataflow job launches bigquery jobs.") return result
def display_data(self): result = { 'create_disposition': str(self.create_disposition), 'write_disposition': str(self.write_disposition), 'additional_bq_params': str(self.additional_bq_parameters), 'schema': str(self.schema), 'launchesBigQueryJobs': DisplayDataItem( True, label="This Dataflow job launches bigquery jobs.") } return result
def display_data(self): res = dict() sql = [] table = [] if self._read_operations is not None: for ro in self._read_operations: if ro.is_sql is True: sql.append(ro.kwargs) elif ro.is_table is True: table.append(ro.kwargs) if sql: res['sql'] = DisplayDataItem(str(sql), label='Sql') if table: res['table'] = DisplayDataItem(str(table), label='Table') if self._transaction: res['transaction'] = DisplayDataItem( str(self._transaction), label='transaction') return res
def test_remote_runner_display_data(self): remote_runner = DataflowRunner() p = Pipeline(remote_runner, options=PipelineOptions(self.default_properties)) # TODO: Should not subclass ParDo. Switch to PTransform as soon as # composite transforms support display data. class SpecialParDo(beam.ParDo): def __init__(self, fn, now): super(SpecialParDo, self).__init__(fn) self.fn = fn self.now = now # Make this a list to be accessible within closure def display_data(self): return {'asubcomponent': self.fn, 'a_class': SpecialParDo, 'a_time': self.now} class SpecialDoFn(beam.DoFn): def display_data(self): return {'dofn_value': 42} def process(self): pass now = datetime.now() # pylint: disable=expression-not-assigned (p | ptransform.Create([1, 2, 3, 4, 5]) | 'Do' >> SpecialParDo(SpecialDoFn(), now)) remote_runner.job = apiclient.Job(p.options) super(DataflowRunner, remote_runner).run(p) job_dict = json.loads(str(remote_runner.job)) steps = [step for step in job_dict['steps'] if len(step['properties'].get('display_data', [])) > 0] step = steps[0] disp_data = step['properties']['display_data'] disp_data = sorted(disp_data, key=lambda x: x['namespace']+x['key']) nspace = SpecialParDo.__module__+ '.' expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo', 'value': DisplayDataItem._format_value(now, 'TIMESTAMP'), 'key': 'a_time'}, {'type': 'STRING', 'namespace': nspace+'SpecialParDo', 'value': nspace+'SpecialParDo', 'key': 'a_class', 'shortValue': 'SpecialParDo'}, {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn', 'value': 42, 'key': 'dofn_value'}] expected_data = sorted(expected_data, key=lambda x: x['namespace']+x['key']) self.assertEqual(len(disp_data), 3) self.assertEqual(disp_data, expected_data)
def display_data(self): res = { 'project_id': DisplayDataItem(self._project_id, label='Project Id'), 'instance_id': DisplayDataItem(self._instance_id, label='Instance Id'), 'pool': DisplayDataItem(str(self._pool), label='Pool'), 'database': DisplayDataItem(self._database_id, label='Database'), 'batch_size': DisplayDataItem( self._max_batch_size_bytes, label="Batch Size"), 'max_number_rows': DisplayDataItem( self._max_number_rows, label="Max Rows"), 'max_number_cells': DisplayDataItem( self._max_number_cells, label="Max Cells"), } return res
def display_data(self): return { 'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic') }
def display_data(self): return { 'launchesBigQueryJobs': DisplayDataItem(True, label="This Dataflow job launches bigquery jobs.") }
def display_data(self): dd_parent = super(_TextSink, self).display_data() dd_parent['append_newline'] = DisplayDataItem( self._append_trailing_newlines, label='Append Trailing New Lines') return dd_parent
def display_data(self): res = {} if self.table is not None: res['table'] = DisplayDataItem(self.table, label='Table') return res
def display_data(self): return { 'file_patterns': DisplayDataItem(str(self._file_patterns), label='File Patterns') }
def display_data(self): return { 'source': DisplayDataItem(self.source.__class__, label='Read Source'), 'source_dd': self.source }