Python PCollection Beispiele

Programmiersprache: Python

Namespace / Paketname: apache_beam.pvalue

Klasse / Typ: PCollection

Beispiele auf hotexamples.com: 14

Python PCollection - 14 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die apache_beam.pvalue.PCollection, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

PCollection(6)

Häufig verwendete Methoden

PCollection (6)

Beispiel #1

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: aaltay/incubator-beam

 def test_group_by_key_input_visitor_for_non_gbk_transforms(self):
   p = TestPipeline()
   pcoll = PCollection(p)
   for transform in [beam.Flatten(), beam.Map(lambda x: x)]:
     pcoll.element_type = typehints.Any
     DataflowRunner.group_by_key_input_visitor().visit_transform(
         AppliedPTransform(None, transform, "label", [pcoll]))
     self.assertEqual(pcoll.element_type, typehints.Any)

Beispiel #2

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: tapanu/beam

 def test_group_by_key_input_visitor_for_non_gbk_transforms(self):
     p = TestPipeline()
     pcoll = PCollection(p)
     for transform in [beam.Flatten(), beam.Map(lambda x: x)]:
         pcoll.element_type = typehints.Any
         DataflowRunner.group_by_key_input_visitor().visit_transform(
             AppliedPTransform(None, transform, "label", [pcoll]))
         self.assertEqual(pcoll.element_type, typehints.Any)

Beispiel #3

Datei anzeigen

 def apply_ReadStringsFromPubSub(self, transform, pcoll):
   try:
     from google.cloud import pubsub as unused_pubsub
   except ImportError:
     raise ImportError('Google Cloud PubSub not available, please install '
                       'apache_beam[gcp]')
   # Execute this as a native transform.
   output = PCollection(pcoll.pipeline)
   output.element_type = unicode
   return output

Beispiel #4

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: charlesccychen/incubator-beam

 def test_group_by_key_input_visitor_with_invalid_inputs(self):
   p = TestPipeline()
   pcoll1 = PCollection(p)
   pcoll2 = PCollection(p)
   for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
     pcoll1.element_type = str
     pcoll2.element_type = typehints.Set
     err_msg = (
         r"Input to 'label' must be compatible with KV\[Any, Any\]. "
         "Found .*")
     for pcoll in [pcoll1, pcoll2]:
       with self.assertRaisesRegexp(ValueError, err_msg):
         DataflowRunner.group_by_key_input_visitor().visit_transform(
             AppliedPTransform(None, transform, "label", [pcoll]))

Beispiel #5

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: aaltay/incubator-beam

  def _test_flatten_input_visitor(self, input_type, output_type, num_inputs):
    p = TestPipeline()
    inputs = []
    for _ in range(num_inputs):
      input_pcoll = PCollection(p)
      input_pcoll.element_type = input_type
      inputs.append(input_pcoll)
    output_pcoll = PCollection(p)
    output_pcoll.element_type = output_type

    flatten = AppliedPTransform(None, beam.Flatten(), "label", inputs)
    flatten.add_output(output_pcoll, None)
    DataflowRunner.flatten_input_visitor().visit_transform(flatten)
    for _ in range(num_inputs):
      self.assertEqual(inputs[0].element_type, output_type)

Beispiel #6

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: NarasimhaKattunga/kafka-1

 def test_group_by_key_input_visitor_with_valid_inputs(self):
   p = TestPipeline()
   pcoll1 = PCollection(p)
   pcoll2 = PCollection(p)
   pcoll3 = PCollection(p)
   for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
     pcoll1.element_type = None
     pcoll2.element_type = typehints.Any
     pcoll3.element_type = typehints.KV[typehints.Any, typehints.Any]
     for pcoll in [pcoll1, pcoll2, pcoll3]:
       applied = AppliedPTransform(None, transform, "label", [pcoll])
       applied.outputs[None] = PCollection(None)
       DataflowRunner.group_by_key_input_visitor().visit_transform(applied)
       self.assertEqual(
           pcoll.element_type, typehints.KV[typehints.Any, typehints.Any])

Beispiel #7

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: Sil1991/gcpdf-demo

 def test_group_by_key_input_visitor_with_invalid_inputs(self):
   p = TestPipeline()
   pcoll1 = PCollection(p)
   pcoll2 = PCollection(p)
   for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
     pcoll1.element_type = typehints.TupleSequenceConstraint
     pcoll2.element_type = typehints.Set
     err_msg = "Input to GroupByKey must be of Tuple or Any type"
     for pcoll in [pcoll1, pcoll2]:
       with self.assertRaisesRegexp(ValueError, err_msg):
         DataflowRunner.group_by_key_input_visitor().visit_transform(
             AppliedPTransform(None, transform, "label", [pcoll]))

Beispiel #8

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: tapanu/beam

    def test_group_by_key_input_visitor_with_invalid_inputs(self):
        p = TestPipeline()
        pcoll1 = PCollection(p)
        pcoll2 = PCollection(p)

        pcoll1.element_type = str
        pcoll2.element_type = typehints.Set
        err_msg = (r"Input to 'label' must be compatible with KV\[Any, Any\]. "
                   "Found .*")
        for pcoll in [pcoll1, pcoll2]:
            with self.assertRaisesRegex(ValueError, err_msg):
                DataflowRunner.group_by_key_input_visitor().visit_transform(
                    AppliedPTransform(None, beam.GroupByKey(), "label",
                                      [pcoll]))

Beispiel #9

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: aaltay/incubator-beam

 def test_group_by_key_input_visitor_with_valid_inputs(self):
   p = TestPipeline()
   pcoll1 = PCollection(p)
   pcoll2 = PCollection(p)
   pcoll3 = PCollection(p)
   for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
     pcoll1.element_type = None
     pcoll2.element_type = typehints.Any
     pcoll3.element_type = typehints.KV[typehints.Any, typehints.Any]
     for pcoll in [pcoll1, pcoll2, pcoll3]:
       DataflowRunner.group_by_key_input_visitor().visit_transform(
           AppliedPTransform(None, transform, "label", [pcoll]))
       self.assertEqual(pcoll.element_type,
                        typehints.KV[typehints.Any, typehints.Any])

Beispiel #10

Datei anzeigen

Datei: dataflow_runner_test.py Projekt: tapanu/beam

    def _test_flatten_input_visitor(self, input_type, output_type, num_inputs):
        p = TestPipeline()
        inputs = []
        for _ in range(num_inputs):
            input_pcoll = PCollection(p)
            input_pcoll.element_type = input_type
            inputs.append(input_pcoll)
        output_pcoll = PCollection(p)
        output_pcoll.element_type = output_type

        flatten = AppliedPTransform(None, beam.Flatten(), "label", inputs)
        flatten.add_output(output_pcoll, None)
        DataflowRunner.flatten_input_visitor().visit_transform(flatten)
        for _ in range(num_inputs):
            self.assertEqual(inputs[0].element_type, output_type)

Beispiel #11

Datei anzeigen

Datei: direct_runner.py Projekt: sumitya/beam

 def expand(self, pvalue):
   # This is handled as a native transform.
   return PCollection(self.pipeline, is_bounded=self._source.is_bounded())

Beispiel #12

Datei anzeigen

 def expand(self, pcoll):
     self._check_pcollection(pcoll)
     return PCollection.from_(pcoll)

Beispiel #13

Datei anzeigen

 def expand(self, pvalue):
     # This is handled as a native transform.
     return PCollection(self.pipeline)

Beispiel #14

Datei anzeigen

 def expand(self, pcoll):
   return PCollection.from_(pcoll)