コード例 #1
0
ファイル: ptransform.py プロジェクト: zahiralward/beam
 def display_data(self):
   res = {'fn': (self._fn.__name__
                 if hasattr(self._fn, '__name__')
                 else self._fn.__class__),
          'args': DisplayDataItem(str(self._args)).drop_if_default('()'),
          'kwargs': DisplayDataItem(str(self._kwargs)).drop_if_default('{}')}
   return res
コード例 #2
0
 def display_data(self):
     return {
         'some_val': DisplayDataItem('something').drop_if_none(),
         'non_val': DisplayDataItem(None).drop_if_none(),
         'def_val': DisplayDataItem(True).drop_if_default(True),
         'nodef_val': DisplayDataItem(True).drop_if_default(False)
     }
コード例 #3
0
 def display_data(self):  # type: () -> dict
   parent_dd = super(MyPTransform, self).display_data()
   parent_dd['dd_string'] = DisplayDataItem(
       'dd_string_value', label='dd_string_label')
   parent_dd['dd_bool'] = DisplayDataItem(False, label='dd_bool_label')
   parent_dd['dd_int'] = DisplayDataItem(1.1, label='dd_int_label')
   return parent_dd
コード例 #4
0
ファイル: textio.py プロジェクト: stallyNon/beam
 def display_data(self):
   parent_dd = super(_TextSource, self).display_data()
   parent_dd['strip_newline'] = DisplayDataItem(
       self._strip_trailing_newlines, label='Strip Trailing New Lines')
   parent_dd['buffer_size'] = DisplayDataItem(
       self._buffer_size, label='Buffer Size')
   parent_dd['coder'] = DisplayDataItem(self._coder.__class__, label='Coder')
   return parent_dd
コード例 #5
0
 def display_data(self):
     return {
         'file_pattern':
         DisplayDataItem(str(self._pattern), label="File Pattern"),
         'compression':
         DisplayDataItem(str(self._compression_type),
                         label='Compression Type')
     }
コード例 #6
0
 def display_data(self):
   return {
       'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic'),
       'id_label': DisplayDataItem(self.id_label, label='ID Label Attribute'),
       'with_attributes': DisplayDataItem(
           self.with_attributes, label='With Attributes').drop_if_none(),
       'timestamp_attribute': DisplayDataItem(
           self.timestamp_attribute, label='Timestamp Attribute'),
   }
コード例 #7
0
 def _display_data(num_quantiles, key, reverse):
   return {
       'num_quantiles': DisplayDataItem(num_quantiles, label="Quantile Count"),
       'key': DisplayDataItem(
           key.__name__
           if hasattr(key, '__name__') else key.__class__.__name__,
           label='Record Comparer Key'),
       'reverse': DisplayDataItem(str(reverse), label='Is reversed')
   }
コード例 #8
0
 def display_data(self):
   return {
       'projectId': DisplayDataItem(
           self.beam_options['project_id'], label='Bigtable Project Id'),
       'instanceId': DisplayDataItem(
           self.beam_options['instance_id'], label='Bigtable Instance Id'),
       'tableId': DisplayDataItem(
           self.beam_options['table_id'], label='Bigtable Table Id')
   }
コード例 #9
0
ファイル: pubsub.py プロジェクト: Sil1991/gcpdf-demo
 def display_data(self):
   return {'id_label':
           DisplayDataItem(self.id_label,
                           label='ID Label Attribute').drop_if_none(),
           'topic':
           DisplayDataItem(self.full_topic,
                           label='Pubsub Topic').drop_if_none(),
           'subscription':
           DisplayDataItem(self.full_subscription,
                           label='Pubsub Subscription').drop_if_none()}
コード例 #10
0
ファイル: fileio.py プロジェクト: gyamxxx/beam
 def display_data(self):
   return {'shards':
           DisplayDataItem(self.num_shards,
                           label='Number of Shards').drop_if_default(0),
           'compression':
           DisplayDataItem(str(self.compression_type)),
           'file_pattern':
           DisplayDataItem('{}{}{}'.format(self.file_path_prefix,
                                           self.shard_name_format,
                                           self.file_name_suffix),
                           label='File Pattern')}
コード例 #11
0
 def _display_data(num_quantiles, key, reverse, weighted, input_batched):
   return {
       'num_quantiles': DisplayDataItem(num_quantiles, label='Quantile Count'),
       'key': DisplayDataItem(
           key.__name__
           if hasattr(key, '__name__') else key.__class__.__name__,
           label='Record Comparer Key'),
       'reverse': DisplayDataItem(str(reverse), label='Is Reversed'),
       'weighted': DisplayDataItem(str(weighted), label='Is Weighted'),
       'input_batched': DisplayDataItem(
           str(input_batched), label='Is Input Batched'),
   }
コード例 #12
0
    def display_data(self):
        res = {}
        if self.table_reference is not None:
            tableSpec = '{}.{}'.format(self.table_reference.datasetId,
                                       self.table_reference.tableId)
            if self.table_reference.projectId is not None:
                tableSpec = '{}:{}'.format(self.table_reference.projectId,
                                           tableSpec)
            res['table'] = DisplayDataItem(tableSpec, label='Table')

        res['validation'] = DisplayDataItem(self.validate,
                                            label="Validation Enabled")
        return res
コード例 #13
0
 def display_data(self):
   return {
       'n': self._n,
       'compare': DisplayDataItem(
           self._compare.__name__ if hasattr(self._compare, '__name__') else
           self._compare.__class__.__name__).drop_if_none()
   }
コード例 #14
0
  def test_remote_runner_display_data(self):
    remote_runner = DataflowRunner()
    p = Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties))

    now = datetime.now()
    # pylint: disable=expression-not-assigned
    (p | ptransform.Create([1, 2, 3, 4, 5])
     | 'Do' >> SpecialParDo(SpecialDoFn(), now))

    # TODO(BEAM-366) Enable runner API on this test.
    p.run(test_runner_api=False)
    job_dict = json.loads(str(remote_runner.job))
    steps = [step
             for step in job_dict['steps']
             if len(step['properties'].get('display_data', [])) > 0]
    step = steps[1]
    disp_data = step['properties']['display_data']
    nspace = SpecialParDo.__module__+ '.'
    expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
                      'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
                      'key': 'a_time'},
                     {'type': 'STRING', 'namespace': nspace+'SpecialParDo',
                      'value': nspace+'SpecialParDo', 'key': 'a_class',
                      'shortValue': 'SpecialParDo'},
                     {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',
                      'value': 42, 'key': 'dofn_value'}]
    self.assertUnhashableCountEqual(disp_data, expected_data)
コード例 #15
0
    def display_data(self):
        if self.query is not None:
            res = {'query': DisplayDataItem(self.query, label='Query')}
        else:
            if self.table_reference.projectId is not None:
                tableSpec = '{}:{}.{}'.format(self.table_reference.projectId,
                                              self.table_reference.datasetId,
                                              self.table_reference.tableId)
            else:
                tableSpec = '{}.{}'.format(self.table_reference.datasetId,
                                           self.table_reference.tableId)
            res = {'table': DisplayDataItem(tableSpec, label='Table')}

        res['validation'] = DisplayDataItem(self.validate,
                                            label='Validation Enabled')
        return res
コード例 #16
0
ファイル: display_test.py プロジェクト: sbilac/incubator-beam
 def display_data(self):
   return {'static_integer': 120,
           'static_string': 'static me!',
           'complex_url': DisplayDataItem('github.com',
                                          url='http://github.com',
                                          label='The URL'),
           'python_class': HasDisplayData,
           'my_dd': self.my_display_data}
コード例 #17
0
 def display_data(self):
   result = {
       'create_disposition': str(self.create_disposition),
       'write_disposition': str(self.write_disposition),
   }
   result['schema'] = str(self.schema)
   result['launchesBigQueryJobs'] = DisplayDataItem(
       True, label="This Dataflow job launches bigquery jobs.")
   return result
コード例 #18
0
 def display_data(self):
   result = {
       'create_disposition': str(self.create_disposition),
       'write_disposition': str(self.write_disposition),
       'additional_bq_params': str(self.additional_bq_parameters),
       'schema': str(self.schema),
       'launchesBigQueryJobs': DisplayDataItem(
           True, label="This Dataflow job launches bigquery jobs.")
   }
   return result
コード例 #19
0
ファイル: spannerio.py プロジェクト: mszb/beam
  def display_data(self):
    res = dict()
    sql = []
    table = []
    if self._read_operations is not None:
      for ro in self._read_operations:
        if ro.is_sql is True:
          sql.append(ro.kwargs)
        elif ro.is_table is True:
          table.append(ro.kwargs)

      if sql:
        res['sql'] = DisplayDataItem(str(sql), label='Sql')
      if table:
        res['table'] = DisplayDataItem(str(table), label='Table')

    if self._transaction:
      res['transaction'] = DisplayDataItem(
          str(self._transaction), label='transaction')

    return res
コード例 #20
0
  def test_remote_runner_display_data(self):
    remote_runner = DataflowRunner()
    p = Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties))

    # TODO: Should not subclass ParDo. Switch to PTransform as soon as
    # composite transforms support display data.
    class SpecialParDo(beam.ParDo):
      def __init__(self, fn, now):
        super(SpecialParDo, self).__init__(fn)
        self.fn = fn
        self.now = now

      # Make this a list to be accessible within closure
      def display_data(self):
        return {'asubcomponent': self.fn,
                'a_class': SpecialParDo,
                'a_time': self.now}

    class SpecialDoFn(beam.DoFn):
      def display_data(self):
        return {'dofn_value': 42}

      def process(self):
        pass

    now = datetime.now()
    # pylint: disable=expression-not-assigned
    (p | ptransform.Create([1, 2, 3, 4, 5])
     | 'Do' >> SpecialParDo(SpecialDoFn(), now))

    remote_runner.job = apiclient.Job(p.options)
    super(DataflowRunner, remote_runner).run(p)
    job_dict = json.loads(str(remote_runner.job))
    steps = [step
             for step in job_dict['steps']
             if len(step['properties'].get('display_data', [])) > 0]
    step = steps[0]
    disp_data = step['properties']['display_data']
    disp_data = sorted(disp_data, key=lambda x: x['namespace']+x['key'])
    nspace = SpecialParDo.__module__+ '.'
    expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
                      'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
                      'key': 'a_time'},
                     {'type': 'STRING', 'namespace': nspace+'SpecialParDo',
                      'value': nspace+'SpecialParDo', 'key': 'a_class',
                      'shortValue': 'SpecialParDo'},
                     {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',
                      'value': 42, 'key': 'dofn_value'}]
    expected_data = sorted(expected_data, key=lambda x: x['namespace']+x['key'])
    self.assertEqual(len(disp_data), 3)
    self.assertEqual(disp_data, expected_data)
コード例 #21
0
ファイル: runner_test.py プロジェクト: sbilac/incubator-beam
  def test_remote_runner_display_data(self):
    remote_runner = DataflowRunner()
    p = Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties))

    # TODO: Should not subclass ParDo. Switch to PTransform as soon as
    # composite transforms support display data.
    class SpecialParDo(beam.ParDo):
      def __init__(self, fn, now):
        super(SpecialParDo, self).__init__(fn)
        self.fn = fn
        self.now = now

      # Make this a list to be accessible within closure
      def display_data(self):
        return {'asubcomponent': self.fn,
                'a_class': SpecialParDo,
                'a_time': self.now}

    class SpecialDoFn(beam.DoFn):
      def display_data(self):
        return {'dofn_value': 42}

      def process(self):
        pass

    now = datetime.now()
    # pylint: disable=expression-not-assigned
    (p | ptransform.Create([1, 2, 3, 4, 5])
     | 'Do' >> SpecialParDo(SpecialDoFn(), now))

    remote_runner.job = apiclient.Job(p.options)
    super(DataflowRunner, remote_runner).run(p)
    job_dict = json.loads(str(remote_runner.job))
    steps = [step
             for step in job_dict['steps']
             if len(step['properties'].get('display_data', [])) > 0]
    step = steps[0]
    disp_data = step['properties']['display_data']
    disp_data = sorted(disp_data, key=lambda x: x['namespace']+x['key'])
    nspace = SpecialParDo.__module__+ '.'
    expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
                      'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
                      'key': 'a_time'},
                     {'type': 'STRING', 'namespace': nspace+'SpecialParDo',
                      'value': nspace+'SpecialParDo', 'key': 'a_class',
                      'shortValue': 'SpecialParDo'},
                     {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',
                      'value': 42, 'key': 'dofn_value'}]
    expected_data = sorted(expected_data, key=lambda x: x['namespace']+x['key'])
    self.assertEqual(len(disp_data), 3)
    self.assertEqual(disp_data, expected_data)
コード例 #22
0
ファイル: spannerio.py プロジェクト: mszb/beam
 def display_data(self):
   res = {
       'project_id': DisplayDataItem(self._project_id, label='Project Id'),
       'instance_id': DisplayDataItem(self._instance_id, label='Instance Id'),
       'pool': DisplayDataItem(str(self._pool), label='Pool'),
       'database': DisplayDataItem(self._database_id, label='Database'),
       'batch_size': DisplayDataItem(
           self._max_batch_size_bytes, label="Batch Size"),
       'max_number_rows': DisplayDataItem(
           self._max_number_rows, label="Max Rows"),
       'max_number_cells': DisplayDataItem(
           self._max_number_cells, label="Max Cells"),
   }
   return res
コード例 #23
0
 def display_data(self):
     return {
         'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic')
     }
コード例 #24
0
 def display_data(self):
     return {
         'launchesBigQueryJobs':
         DisplayDataItem(True,
                         label="This Dataflow job launches bigquery jobs.")
     }
コード例 #25
0
 def display_data(self):
     dd_parent = super(_TextSink, self).display_data()
     dd_parent['append_newline'] = DisplayDataItem(
         self._append_trailing_newlines, label='Append Trailing New Lines')
     return dd_parent
コード例 #26
0
ファイル: sql.py プロジェクト: jac2130/pysql-beam
 def display_data(self):
     res = {}
     if self.table is not None:
         res['table'] = DisplayDataItem(self.table, label='Table')
     return res
コード例 #27
0
 def display_data(self):
     return {
         'file_patterns':
         DisplayDataItem(str(self._file_patterns), label='File Patterns')
     }
コード例 #28
0
ファイル: iobase.py プロジェクト: zhangminglei/beam
 def display_data(self):
     return {
         'source': DisplayDataItem(self.source.__class__,
                                   label='Read Source'),
         'source_dd': self.source
     }