def test_root_transforms(self): root_create = Create('create', [[1, 2, 3]]) class DummySource(iobase.BoundedSource): pass root_read = Read('read', DummySource()) root_flatten = Flatten('flatten', pipeline=self.pipeline) pbegin = pvalue.PBegin(self.pipeline) pcoll_create = pbegin | root_create pbegin | root_read pcoll_create | FlatMap(lambda x: x) [] | root_flatten self.pipeline.visit(self.visitor) root_transforms = sorted( [t.transform for t in self.visitor.root_transforms]) self.assertEqual(root_transforms, sorted([root_read, root_create, root_flatten])) pbegin_consumers = sorted( [c.transform for c in self.visitor.value_to_consumers[pbegin]]) self.assertEqual(pbegin_consumers, sorted([root_read, root_create])) self.assertEqual(len(self.visitor.step_names), 4)
def expand(self, pcol): sources_pcol = [] for descriptor in self.pubsub_source_descriptors: source_match = re.match(PUBSUB_DESCRIPTOR_REGEXP, descriptor.source) source_project = source_match.group(1) source_type = source_match.group(2) source_name = source_match.group(3) read_step_name = 'PubSub %s/project:%s/Read %s' % ( source_type, source_project, source_name) if source_type == 'topics': current_source = pcol | read_step_name >> ReadFromPubSub( topic=descriptor.source, id_label=descriptor.id_label, with_attributes=self.with_attributes, timestamp_attribute=descriptor.timestamp_attribute) else: current_source = pcol | read_step_name >> ReadFromPubSub( subscription=descriptor.source, id_label=descriptor.id_label, with_attributes=self.with_attributes, timestamp_attribute=descriptor.timestamp_attribute) sources_pcol.append(current_source) return tuple(sources_pcol) | Flatten()
def test_root_transforms(self): root_read = beam.Impulse() root_flatten = Flatten(pipeline=self.pipeline) pbegin = pvalue.PBegin(self.pipeline) pcoll_read = pbegin | 'read' >> root_read pcoll_read | FlatMap(lambda x: x) [] | 'flatten' >> root_flatten self.pipeline.visit(self.visitor) root_transforms = [t.transform for t in self.visitor.root_transforms] self.assertCountEqual(root_transforms, [root_read, root_flatten]) pbegin_consumers = [ c.transform for c in self.visitor.value_to_consumers[pbegin] ] self.assertCountEqual(pbegin_consumers, [root_read]) self.assertEqual(len(self.visitor.step_names), 3)
def test_root_transforms(self): class DummySource(iobase.BoundedSource): pass root_read = Read(DummySource()) root_flatten = Flatten(pipeline=self.pipeline) pbegin = pvalue.PBegin(self.pipeline) pcoll_read = pbegin | 'read' >> root_read pcoll_read | FlatMap(lambda x: x) [] | 'flatten' >> root_flatten self.pipeline.visit(self.visitor) root_transforms = sorted( [t.transform for t in self.visitor.root_transforms]) self.assertEqual(root_transforms, sorted([root_read, root_flatten])) pbegin_consumers = sorted( [c.transform for c in self.visitor.value_to_consumers[pbegin]]) self.assertEqual(pbegin_consumers, sorted([root_read])) self.assertEqual(len(self.visitor.step_names), 3)