Esempio n. 1
0
    def test_root_transforms(self):
        root_create = Create('create', [[1, 2, 3]])

        class DummySource(iobase.BoundedSource):
            pass

        root_read = Read('read', DummySource())
        root_flatten = Flatten('flatten', pipeline=self.pipeline)

        pbegin = pvalue.PBegin(self.pipeline)
        pcoll_create = pbegin | root_create
        pbegin | root_read
        pcoll_create | FlatMap(lambda x: x)
        [] | root_flatten

        self.pipeline.visit(self.visitor)

        root_transforms = sorted(
            [t.transform for t in self.visitor.root_transforms])
        self.assertEqual(root_transforms,
                         sorted([root_read, root_create, root_flatten]))

        pbegin_consumers = sorted(
            [c.transform for c in self.visitor.value_to_consumers[pbegin]])
        self.assertEqual(pbegin_consumers, sorted([root_read, root_create]))
        self.assertEqual(len(self.visitor.step_names), 4)
Esempio n. 2
0
File: pubsub.py Progetto: mahak/beam
    def expand(self, pcol):
        sources_pcol = []
        for descriptor in self.pubsub_source_descriptors:
            source_match = re.match(PUBSUB_DESCRIPTOR_REGEXP,
                                    descriptor.source)
            source_project = source_match.group(1)
            source_type = source_match.group(2)
            source_name = source_match.group(3)

            read_step_name = 'PubSub %s/project:%s/Read %s' % (
                source_type, source_project, source_name)

            if source_type == 'topics':
                current_source = pcol | read_step_name >> ReadFromPubSub(
                    topic=descriptor.source,
                    id_label=descriptor.id_label,
                    with_attributes=self.with_attributes,
                    timestamp_attribute=descriptor.timestamp_attribute)
            else:
                current_source = pcol | read_step_name >> ReadFromPubSub(
                    subscription=descriptor.source,
                    id_label=descriptor.id_label,
                    with_attributes=self.with_attributes,
                    timestamp_attribute=descriptor.timestamp_attribute)

            sources_pcol.append(current_source)

        return tuple(sources_pcol) | Flatten()
    def test_root_transforms(self):
        root_read = beam.Impulse()
        root_flatten = Flatten(pipeline=self.pipeline)

        pbegin = pvalue.PBegin(self.pipeline)
        pcoll_read = pbegin | 'read' >> root_read
        pcoll_read | FlatMap(lambda x: x)
        [] | 'flatten' >> root_flatten

        self.pipeline.visit(self.visitor)

        root_transforms = [t.transform for t in self.visitor.root_transforms]

        self.assertCountEqual(root_transforms, [root_read, root_flatten])

        pbegin_consumers = [
            c.transform for c in self.visitor.value_to_consumers[pbegin]
        ]
        self.assertCountEqual(pbegin_consumers, [root_read])
        self.assertEqual(len(self.visitor.step_names), 3)
    def test_root_transforms(self):
        class DummySource(iobase.BoundedSource):
            pass

        root_read = Read(DummySource())
        root_flatten = Flatten(pipeline=self.pipeline)

        pbegin = pvalue.PBegin(self.pipeline)
        pcoll_read = pbegin | 'read' >> root_read
        pcoll_read | FlatMap(lambda x: x)
        [] | 'flatten' >> root_flatten

        self.pipeline.visit(self.visitor)

        root_transforms = sorted(
            [t.transform for t in self.visitor.root_transforms])

        self.assertEqual(root_transforms, sorted([root_read, root_flatten]))

        pbegin_consumers = sorted(
            [c.transform for c in self.visitor.value_to_consumers[pbegin]])
        self.assertEqual(pbegin_consumers, sorted([root_read]))
        self.assertEqual(len(self.visitor.step_names), 3)