Ejemplos de Impulse en Python, ejemplos de apache_beam.Impulse en Python

Ejemplo n.º 1

0

Mostrar archivo

 def test_pcolls_to_pcoll_id(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     # pylint: disable=range-builtin-not-iterating
     init_pcoll = p | 'Init Create' >> beam.Impulse()
     _, ctx = p.to_runner_api(use_fake_coders=True, return_context=True)
     self.assertEqual(instr.pcolls_to_pcoll_id(p, ctx),
                      {str(init_pcoll): 'ref_PCollection_PCollection_1'})

Ejemplo n.º 2

0

Mostrar archivo

Archivo: streaming_cache.py Proyecto: umanium/beam

    def source(self, *labels):
        """Returns the StreamingCacheManager source.

    This is beam.Impulse() because unbounded sources will be marked with this
    and then the PipelineInstrument will replace these with a TestStream.
    """
        return beam.Impulse()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: consumer_tracking_pipeline_visitor_test.py Proyecto: SofyanS/CH_redact

    def test_side_inputs(self):
        class SplitNumbersFn(DoFn):
            def process(self, element):
                if element < 0:
                    yield pvalue.TaggedOutput('tag_negative', element)
                else:
                    yield element

        class ProcessNumbersFn(DoFn):
            def process(self, element, negatives):
                yield element

        root_read = beam.Impulse()

        result = (self.pipeline
                  | 'read' >> root_read
                  | ParDo(SplitNumbersFn()).with_outputs('tag_negative',
                                                         main='positive'))
        positive, negative = result
        positive | ParDo(ProcessNumbersFn(), AsList(negative))

        self.pipeline.visit(self.visitor)

        root_transforms = [t.transform for t in self.visitor.root_transforms]
        self.assertEqual(root_transforms, [root_read])
        self.assertEqual(len(self.visitor.step_names), 3)
        self.assertEqual(len(self.visitor.views), 1)
        self.assertTrue(isinstance(self.visitor.views[0], pvalue.AsList))

Ejemplo n.º 4

0

Mostrar archivo

Archivo: pipeline_instrument_test.py Proyecto: fernando-wizeline/beam

 def test_pcoll_to_pcoll_id(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     ie.current_env().set_cache_manager(InMemoryCache(), p)
     # pylint: disable=bad-option-value
     init_pcoll = p | 'Init Create' >> beam.Impulse()
     _, ctx = p.to_runner_api(return_context=True)
     self.assertEqual(instr.pcoll_to_pcoll_id(p, ctx),
                      {str(init_pcoll): 'ref_PCollection_PCollection_1'})

Ejemplo n.º 5

0

Mostrar archivo

 def expand(self, pbegin):
   assert isinstance(pbegin, pvalue.PBegin), (
       'Input to transform must be a PBegin but found %s' % pbegin)
   return (
       pbegin
       | 'Impulse' >> beam.Impulse()
       | 'GenerateKeys' >> beam.ParDo(
           StatefulLoadGenerator.GenerateKeys(self.num_keys, self.key_size))
       | 'GenerateLoad' >> beam.ParDo(
           StatefulLoadGenerator.GenerateLoad(
               self.num_records // self.num_keys, self.value_size)))

Ejemplo n.º 6

0

Mostrar archivo

 def build_write_pipeline(self, pipeline):
     _ = (
         pipeline
         | 'Impulse' >> beam.Impulse()
         | 'Generate' >> beam.FlatMap(lambda x: range(NUM_RECORDS))  # pylint: disable=range-builtin-not-iterating
         | 'Reshuffle' >> beam.Reshuffle()
         | 'MakeKV' >> beam.Map(lambda x:
                                (b'', str(x).encode())).with_output_types(
                                    typing.Tuple[bytes, bytes])
         | 'WriteToKafka' >> WriteToKafka(
             producer_config={'bootstrap.servers': self.bootstrap_servers},
             topic=self.topic,
             expansion_service=self.expansion_service))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: envapi.py Proyecto: sambvfx/rillbeam

def main(options):
    with beam.Pipeline(options=options) as pipe:
        (pipe
         | 'Impulse' >> beam.Impulse()
         | 'LocalEnv' >> GetEnv('FOO')
         | 'Local' >> Log(color='green')
         | 'RemoteEnv' >> EnvTransform(GetEnv, {'FOO': 'BAR'}, 'FOO')
         | 'Remote' >> Log(color='cyan')
         | 'AssertEnv' >> EnvTransform(AssertEnv, {'FOO': 'BAR'}, FOO='BAR')
         | 'ClsLvlNeedsEnv' >> NeedsEnv()
         # TODO:
         # | 'InstLvlNeedsEnv' >> NeedsEnv().with_env(FOO='BAZ')
         )

Ejemplo n.º 8

0

Mostrar archivo

 def run_write_pipeline(
     self, num_rows, to_row_fn, row_type, spanner_transform=None):
   with TestPipeline(is_integration_test=True) as p:
     p.not_use_test_runner_api = True
     _ = (
         p
         | 'Impulse' >> beam.Impulse()
         | 'Generate' >> beam.FlatMap(lambda x: range(num_rows))  # pylint: disable=range-builtin-not-iterating
         | 'Map to row' >> beam.Map(to_row_fn).with_output_types(row_type)
         | 'Write to Spanner' >> spanner_transform(
             instance_id=self.instance_id,
             database_id=self.database_id,
             project_id=self.project_id,
             table=self.table,
             emulator_host=self.spanner_helper.get_emulator_host(),
         ))

Ejemplo n.º 9

0

Mostrar archivo

def send(pipeline_options):

    with beam.Pipeline(options=pipeline_options) as pipe:
        (pipe
         | beam.Impulse()
         | beam.Map(lambda x: (1, x))
         | Log()
         # | 'KafkaWrite' >> WriteToKafka(
         #       producer_config={
         #           'bootstrap.servers': 'localhost:9092',
         #       },
         #       topic=TOPIC,
         #       # key_serializer='org.apache.kafka.common.serialization.ByteArraySerializer',
         #       # value_serializer='org.apache.kafka.common.serialization.ByteArraySerializer',
         #       expansion_service='localhost:8097',
         #   )
         )

Ejemplo n.º 10

0

Mostrar archivo

Archivo: pipeline_test.py Proyecto: AfterShip/aftership-beam

  def test_visit_entire_graph(self):
    pipeline = Pipeline()
    pcoll1 = pipeline | 'pcoll' >> beam.Impulse()
    pcoll2 = pcoll1 | 'do1' >> FlatMap(lambda x: [x + 1])
    pcoll3 = pcoll2 | 'do2' >> FlatMap(lambda x: [x + 1])
    pcoll4 = pcoll2 | 'do3' >> FlatMap(lambda x: [x + 1])
    transform = PipelineTest.CustomTransform()
    pcoll5 = pcoll4 | transform

    visitor = PipelineTest.Visitor(visited=[])
    pipeline.visit(visitor)
    self.assertEqual({pcoll1, pcoll2, pcoll3, pcoll4, pcoll5},
                     set(visitor.visited))
    self.assertEqual(set(visitor.enter_composite), set(visitor.leave_composite))
    self.assertEqual(2, len(visitor.enter_composite))
    self.assertEqual(visitor.enter_composite[1].transform, transform)
    self.assertEqual(visitor.leave_composite[0].transform, transform)

Ejemplo n.º 11

0

Mostrar archivo

    def test_pardo_with_unbounded_per_element_dofn(self):
        class UnboundedDoFn(beam.DoFn):
            @beam.DoFn.unbounded_per_element()
            def process(self, element):
                pass

        class BoundedDoFn(beam.DoFn):
            def process(self, element):
                pass

        with TestPipeline() as p:
            source = p | beam.Impulse()
            unbounded_pcoll = source | beam.ParDo(UnboundedDoFn())
            bounded_pcoll = source | beam.ParDo(BoundedDoFn())

            self.assertEqual(unbounded_pcoll.is_bounded, False)
            self.assertEqual(bounded_pcoll.is_bounded, True)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: xlang_kinesisio_it_test.py Proyecto: zhoufek/beam

 def run_kinesis_write(self):
     with TestPipeline(options=PipelineOptions(self.pipeline_args)) as p:
         p.not_use_test_runner_api = True
         _ = (
             p
             | 'Impulse' >> beam.Impulse()
             | 'Generate' >> beam.FlatMap(lambda x: range(NUM_RECORDS))  # pylint: disable=bad-option-value
             | 'Map to bytes' >> beam.Map(lambda x: RECORD + str(x).encode(
             )).with_output_types(bytes)
             | 'WriteToKinesis' >> WriteToKinesis(
                 stream_name=self.aws_kinesis_stream,
                 aws_access_key=self.aws_access_key,
                 aws_secret_key=self.aws_secret_key,
                 region=self.aws_region,
                 service_endpoint=self.aws_service_endpoint,
                 verify_certificate=(not self.use_localstack),
                 partition_key='1',
                 producer_properties=self.producer_properties,
             ))

Ejemplo n.º 13

0

Mostrar archivo

Archivo: bigquery_test.py Proyecto: SarahTTAN107/code_snippets

  def test_schema_autodetect_not_allowed_with_avro_file_loads(self):
    with TestPipeline() as p:
      pc = p | beam.Impulse()

      with self.assertRaisesRegex(ValueError, '^A schema must be provided'):
        _ = (
            pc
            | 'No Schema' >> beam.io.gcp.bigquery.WriteToBigQuery(
                "dataset.table",
                schema=None,
                temp_file_format=bigquery_tools.FileFormat.AVRO))

      with self.assertRaisesRegex(ValueError,
                                  '^Schema auto-detection is not supported'):
        _ = (
            pc
            | 'Schema Autodetected' >> beam.io.gcp.bigquery.WriteToBigQuery(
                "dataset.table",
                schema=beam.io.gcp.bigquery.SCHEMA_AUTODETECT,
                temp_file_format=bigquery_tools.FileFormat.AVRO))

Ejemplo n.º 14

0

Mostrar archivo

    def test_passthrough(self):
        """
    Test that PTransforms which pass through their input PCollection can be
    used with PipelineInfo.
    """
        class Passthrough(beam.PTransform):
            def expand(self, pcoll):
                return pcoll

        p = beam.Pipeline(runner=self.runner)
        p | beam.Impulse() | Passthrough()  # pylint: disable=expression-not-assigned
        proto = to_stable_runner_api(p).components
        info = pipeline_analyzer.PipelineInfo(proto)
        for pcoll_id in info.all_pcollections():
            # FIXME: If PipelineInfo does not support passthrough PTransforms, this
            #        will only fail some of the time, depending on the ordering of
            #        transforms in the Pipeline proto.

            # Should not throw exception
            info.cache_label(pcoll_id)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: consumer_tracking_pipeline_visitor_test.py Proyecto: SofyanS/CH_redact

    def test_root_transforms(self):
        root_read = beam.Impulse()
        root_flatten = Flatten(pipeline=self.pipeline)

        pbegin = pvalue.PBegin(self.pipeline)
        pcoll_read = pbegin | 'read' >> root_read
        pcoll_read | FlatMap(lambda x: x)
        [] | 'flatten' >> root_flatten

        self.pipeline.visit(self.visitor)

        root_transforms = [t.transform for t in self.visitor.root_transforms]

        self.assertCountEqual(root_transforms, [root_read, root_flatten])

        pbegin_consumers = [
            c.transform for c in self.visitor.value_to_consumers[pbegin]
        ]
        self.assertCountEqual(pbegin_consumers, [root_read])
        self.assertEqual(len(self.visitor.step_names), 3)

Ejemplo n.º 16

0

Mostrar archivo

    def test_side_inputs(self):
        class SplitNumbersFn(DoFn):
            def process(self, element):
                if element < 0:
                    yield pvalue.TaggedOutput('tag_negative', element)
                else:
                    yield element

        class ProcessNumbersFn(DoFn):
            def process(self, element, negatives):
                yield element

        def _process_numbers(pcoll, negatives):
            first_output = (pcoll
                            | 'process numbers step 1' >> ParDo(
                                ProcessNumbersFn(), negatives))

            second_output = (first_output
                             | 'process numbers step 2' >> ParDo(
                                 ProcessNumbersFn(), negatives))

            output_pc = ((first_output, second_output)
                         | 'flatten results' >> beam.Flatten())
            return output_pc

        root_read = beam.Impulse()

        result = (self.pipeline
                  | 'read' >> root_read
                  | ParDo(SplitNumbersFn()).with_outputs('tag_negative',
                                                         main='positive'))
        positive, negative = result
        _process_numbers(positive, AsList(negative))

        self.pipeline.visit(self.visitor)

        root_transforms = [t.transform for t in self.visitor.root_transforms]
        self.assertEqual(root_transforms, [root_read])
        self.assertEqual(len(self.visitor.step_names), 5)
        self.assertEqual(len(self.visitor.views), 1)
        self.assertTrue(isinstance(self.visitor.views[0], pvalue.AsList))

Ejemplo n.º 17

0

Mostrar archivo

Archivo: portable_runner_test.py Proyecto: AfterShip/aftership-beam

    def test_pardo_state_with_custom_key_coder(self):
        """Tests that state requests work correctly when the key coder is an
    SDK-specific coder, i.e. non standard coder. This is additionally enforced
    by Java's ProcessBundleDescriptorsTest and by Flink's
    ExecutableStageDoFnOperator which detects invalid encoding by checking for
    the correct key group of the encoded key."""
        index_state_spec = userstate.CombiningValueStateSpec('index', sum)

        # Test params
        # Ensure decent amount of elements to serve all partitions
        n = 200
        duplicates = 1

        split = n // (duplicates + 1)
        inputs = [(i % split, str(i % split)) for i in range(0, n)]

        # Use a DoFn which has to use FastPrimitivesCoder because the type cannot
        # be inferred
        class Input(beam.DoFn):
            def process(self, impulse):
                for i in inputs:
                    yield i

        class AddIndex(beam.DoFn):
            def process(self, kv,
                        index=beam.DoFn.StateParam(index_state_spec)):
                k, v = kv
                index.add(1)
                yield k, v, index.read()

        expected = [(i % split, str(i % split), i // split + 1)
                    for i in range(0, n)]

        with self.create_pipeline() as p:
            assert_that(
                p
                | beam.Impulse()
                | beam.ParDo(Input())
                | beam.ParDo(AddIndex()), equal_to(expected))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: xlang_snowflakeio_it_test.py Proyecto: AfterShip/aftership-beam

  def run_write(self):
    def user_data_mapper(test_row):
      return [
          str(test_row.number_column).encode('utf-8'),
          str(test_row.boolean_column).encode('utf-8'),
          binascii.hexlify(test_row.bytes_column),
      ]

    with TestPipeline(options=PipelineOptions(self.pipeline_args)) as p:
      p.not_use_test_runner_api = True
      _ = (
          p
          | 'Impulse' >> beam.Impulse()
          | 'Generate' >> beam.FlatMap(lambda x: range(NUM_RECORDS))  # pylint: disable=range-builtin-not-iterating
          | 'Map to TestRow' >> beam.Map(
              lambda num: TestRow(
                  num, num % 2 == 0, b"test" + str(num).encode()))
          | WriteToSnowflake(
              server_name=self.server_name,
              username=self.username,
              password=self.password,
              o_auth_token=self.o_auth_token,
              private_key_path=self.private_key_path,
              raw_private_key=self.raw_private_key,
              private_key_passphrase=self.private_key_passphrase,
              schema=self.schema,
              database=self.database,
              role=self.role,
              warehouse=self.warehouse,
              staging_bucket_name=self.staging_bucket_name,
              storage_integration_name=self.storage_integration_name,
              create_disposition=CreateDisposition.CREATE_IF_NEEDED,
              write_disposition=WriteDisposition.TRUNCATE,
              table_schema=SCHEMA_STRING,
              user_data_mapper=user_data_mapper,
              table=self.table,
              query=None,
              expansion_service=self.expansion_service,
          ))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: sideinput_test.py Proyecto: AfterShip/aftership-beam

    def test(self):
        class SequenceSideInputTestDoFn(beam.DoFn):
            """Iterate over first n side_input elements."""
            def __init__(self, first_n):
                self._first_n = first_n

            def process(self, unused_element, side_input):
                i = 0
                it = iter(side_input)
                while i < self._first_n:
                    i += 1
                    try:
                        # No-op. We only make sure that the element is accessed.
                        next(it)
                    except StopIteration:
                        return

        class MappingSideInputTestDoFn(beam.DoFn):
            """Take a sequence of keys as an additional side input and for each
      key in the sequence checks the value for key in the dictionary."""
            def process(self, unused_element, dict_side_input, keys_to_check):
                for key in keys_to_check:
                    # No-op. We only make sure that the element is accessed.
                    dict_side_input[key]

        class GetRandomKeys(beam.DoFn):
            def __init__(self, n):
                self._n = n

            def process(self, unused_element, dict_side_input):
                import random
                n = min(self._n, len(dict_side_input))
                return random.sample(dict_side_input.keys(), n)

        class AddEventTimestamps(beam.DoFn):
            """Assign timestamp to each element of PCollection."""
            def setup(self):
                self._timestamp = 0

            def process(self, element):
                from apache_beam.transforms.combiners import window
                yield window.TimestampedValue(element, self._timestamp)
                self._timestamp += 1

        input_pc = (self.pipeline
                    | 'Read synthetic' >> beam.io.Read(
                        SyntheticSource(self.parse_synthetic_source_options()))
                    | 'Collect start time metrics' >> beam.ParDo(
                        MeasureTime(self.metrics_namespace)))

        if self.side_input_size != self.input_options.get('num_records'):
            side_input = (
                input_pc
                | 'Sample {} elements'.format(self.side_input_size) >>
                beam.combiners.Sample.FixedSizeGlobally(self.side_input_size)
                | 'Flatten a sequence' >> beam.FlatMap(lambda x: x))
        else:
            side_input = input_pc

        if self.windows > 0:
            window_size = self.side_input_size / self.windows
            logging.info('Fixed windows of %s seconds will be applied',
                         window_size)
            side_input = (
                side_input
                | 'Add event timestamps' >> beam.ParDo(AddEventTimestamps())
                | 'Apply windows' >> beam.WindowInto(
                    beam.combiners.window.FixedWindows(window_size)))

        side_input_type = self.materialize_as()
        elements_to_access = self.side_input_size * self.access_percentage // 100
        logging.info(
            '%s out of %s total elements in the side input will be '
            'accessed.', elements_to_access, self.side_input_size)
        if side_input_type is beam.pvalue.AsDict:
            random_keys = (self.pipeline
                           | beam.Impulse()
                           | 'Get random keys' >> beam.ParDo(
                               GetRandomKeys(elements_to_access),
                               beam.pvalue.AsDict(side_input)))
            pc = input_pc | beam.ParDo(MappingSideInputTestDoFn(),
                                       side_input_type(side_input),
                                       beam.pvalue.AsList(random_keys))
        else:
            pc = input_pc | beam.ParDo(
                SequenceSideInputTestDoFn(elements_to_access),
                side_input_type(side_input))

        _ = pc | 'Collect end time metrics' >> beam.ParDo(
            MeasureTime(self.metrics_namespace))

Ejemplo n.º 20

0

Mostrar archivo

 def test_impulse(self):
     with test_pipeline.TestPipeline(runner='BundleBasedDirectRunner') as p:
         assert_that(p | beam.Impulse(), equal_to([b'']))

Ejemplo n.º 21

0

Mostrar archivo

def run(argv=None):
    options = PipelineOptions(argv)
    options.view_as(SetupOptions).save_main_session = True
    with Pipeline(options=options) as p:
        (p | beam.Impulse() | beam.ParDo(MysqlDoFn()))