def expand(self, pvalue): pcoll = pvalue.pipeline | Read(self._source) pcoll.element_type = bytes if self.with_attributes: pcoll = pcoll | Map(PubsubMessage._from_proto_str) pcoll.element_type = PubsubMessage return pcoll
def expand(self, pvalue): frames = (pvalue.pipeline | Read(self._source) | beam.Partition(splitBadFiles, 2)) chunks = (frames[1] | beam.FlatMap(lambda e: [e]) | beam.CombinePerKey(combineTZ())) return chunks
def expand(self, pvalue): frames = (pvalue.pipeline | Read(self._source) | beam.ParDo(TagFrames()).with_outputs()) offsetMap = (frames.offsets | beam.CombineGlobally(CombineOffsets())) chunks = (frames.frames | beam.ParDo( FramesToChunks(chunkShape=self.chunkShape, Overlap=self.Overlap, downSample=self.downSample), beam.pvalue.AsSingleton(offsetMap)).with_outputs()) chunksCombined = (chunks.chunks | beam.CombinePerKey(combineTZ())) return chunksCombined
def expand(self, pvalue): return pvalue.pipeline | Read(self._source)
def expand(self, pvalue): pcoll = pvalue.pipeline | Read(self._source) pcoll.element_type = bytes pcoll = pcoll | 'DecodeString' >> Map(lambda b: b.decode('utf-8')) pcoll.element_type = unicode return pcoll
def expand(self, pvalue): return pvalue | Read(self._source) | ParDo(_ArrowTableToRowDictionaries())
def expand(self, pvalue): return pvalue.pipeline | Read(_TFRecordSource(*self._args))
def expand(self, pvalue): return pvalue.pipeline | Read(self._source).with_output_types( self._source.output_type_hint())
def expand(self, pcoll): """ Implements method `apache_beam.transforms.ptransform.PTransform.expand`. """ self.logger.info('Starting Billboard.com scrape.') return pcoll | Read(self._source)
def expand(self, pvalue): pcoll = pvalue.pipeline | Read(self._source) pcoll.element_type = bytes pcoll = pcoll | 'decode string' >> ParDo(_decodeUtf8String) pcoll.element_type = unicode return pcoll