class PersonCoderImpl(coder_impl.StreamCoderImpl): _int_coder_impl = coder_impl.VarIntCoderImpl() _str_coder_impl = StrUtf8Coder().get_impl() _time_coder_impl = coder_impl.TimestampCoderImpl() def encode_to_stream(self, value, stream, nested): self._int_coder_impl.encode_to_stream(value.id, stream, True) self._str_coder_impl.encode_to_stream(value.name, stream, True) self._str_coder_impl.encode_to_stream(value.email_address, stream, True) self._str_coder_impl.encode_to_stream(value.credit_card, stream, True) self._str_coder_impl.encode_to_stream(value.city, stream, True) self._str_coder_impl.encode_to_stream(value.state, stream, True) self._time_coder_impl.encode_to_stream(value.date_time, stream, True) self._str_coder_impl.encode_to_stream(value.extra, stream, True) def decode_from_stream(self, stream, nested): id = self._int_coder_impl.decode_from_stream(stream, True) name = self._str_coder_impl.decode_from_stream(stream, True) email = self._str_coder_impl.decode_from_stream(stream, True) credit_card = self._str_coder_impl.decode_from_stream(stream, True) city = self._str_coder_impl.decode_from_stream(stream, True) state = self._str_coder_impl.decode_from_stream(stream, True) date_time = self._time_coder_impl.decode_from_stream(stream, True) extra = self._str_coder_impl.decode_from_stream(stream, True) return Person(id, name, email, credit_card, city, state, date_time, extra)
def _nonnull_coder_from_type(field_type): type_info = field_type.WhichOneof("type_info") if type_info == "atomic_type": if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64): return VarIntCoder() elif field_type.atomic_type == schema_pb2.DOUBLE: return FloatCoder() elif field_type.atomic_type == schema_pb2.STRING: return StrUtf8Coder() elif field_type.atomic_type == schema_pb2.BOOLEAN: return BooleanCoder() elif field_type.atomic_type == schema_pb2.BYTES: return BytesCoder() elif type_info == "array_type": return IterableCoder(_coder_from_type(field_type.array_type.element_type)) elif type_info == "map_type": return MapCoder( _coder_from_type(field_type.map_type.key_type), _coder_from_type(field_type.map_type.value_type)) elif type_info == "row_type": return RowCoder(field_type.row_type.schema) # The Java SDK supports several more types, but the coders are not yet # standard, and are not implemented in Python. raise ValueError( "Encountered a type that is not currently supported by RowCoder: %s" % field_type)
class AuctionCoderImpl(coder_impl.StreamCoderImpl): _int_coder_impl = coder_impl.VarIntCoderImpl() _str_coder_impl = StrUtf8Coder().get_impl() _time_coder_impl = coder_impl.TimestampCoderImpl() def encode_to_stream(self, value, stream, nested): self._int_coder_impl.encode_to_stream(value.id, stream, True) self._str_coder_impl.encode_to_stream(value.item_name, stream, True) self._str_coder_impl.encode_to_stream(value.description, stream, True) self._int_coder_impl.encode_to_stream(value.initial_bid, stream, True) self._int_coder_impl.encode_to_stream(value.reserve, stream, True) self._time_coder_impl.encode_to_stream(value.date_time, stream, True) self._time_coder_impl.encode_to_stream(value.expires, stream, True) self._int_coder_impl.encode_to_stream(value.seller, stream, True) self._int_coder_impl.encode_to_stream(value.category, stream, True) self._str_coder_impl.encode_to_stream(value.extra, stream, True) def decode_from_stream(self, stream, nested): id = self._int_coder_impl.decode_from_stream(stream, True) item_name = self._str_coder_impl.decode_from_stream(stream, True) description = self._str_coder_impl.decode_from_stream(stream, True) initial_bid = self._int_coder_impl.decode_from_stream(stream, True) reserve = self._int_coder_impl.decode_from_stream(stream, True) date_time = self._time_coder_impl.decode_from_stream(stream, True) expires = self._time_coder_impl.decode_from_stream(stream, True) seller = self._int_coder_impl.decode_from_stream(stream, True) category = self._int_coder_impl.decode_from_stream(stream, True) extra = self._str_coder_impl.decode_from_stream(stream, True) return Auction(id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra)
def __init__(self, key_coder_impl, window_coder_impl): self._timestamp_coder_impl = TimestampCoderImpl() self._boolean_coder_impl = BooleanCoderImpl() self._pane_info_coder_impl = PaneInfoCoderImpl() self._key_coder_impl = key_coder_impl self._windows_coder_impl = TupleSequenceCoderImpl(window_coder_impl) from apache_beam.coders.coders import StrUtf8Coder self._tag_coder_impl = StrUtf8Coder().get_impl()
class _RemoveDuplicates(beam.DoFn): FILES_STATE = BagStateSpec('files', StrUtf8Coder()) def process(self, element, file_state=beam.DoFn.StateParam(FILES_STATE)): path = element[0] file_metadata = element[1] bag_content = [x for x in file_state.read()] if not bag_content: file_state.add(path) _LOGGER.debug('Generated entry for file %s', path) yield file_metadata else: _LOGGER.debug('File %s was already read', path)
def MockReadFromText( file_pattern=None, coder=StrUtf8Coder(), skip_header_lines=0): file_content = get_current_test_context().get_file_content(file_pattern) if file_content is None: raise RuntimeError('no file content set for %s' % file_pattern) lines = file_content.replace('\r\n', '\n').split('\n') if skip_header_lines: lines = lines[skip_header_lines:] return 'MockReadFromText' >> beam.Create( [ coder.decode(line) for line in lines ] )
def coder_from_type(field_type): type_info = field_type.WhichOneof("type_info") if type_info == "atomic_type": if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64): return VarIntCoder() elif field_type.atomic_type == schema_pb2.DOUBLE: return FloatCoder() elif field_type.atomic_type == schema_pb2.STRING: return StrUtf8Coder() elif type_info == "array_type": return IterableCoder( RowCoder.coder_from_type(field_type.array_type.element_type)) # The Java SDK supports several more types, but the coders are not yet # standard, and are not implemented in Python. raise ValueError( "Encountered a type that is not currently supported by RowCoder: %s" % field_type)
class BidCoderImpl(coder_impl.StreamCoderImpl): _int_coder_impl = coder_impl.VarIntCoderImpl() _str_coder_impl = StrUtf8Coder().get_impl() _time_coder_impl = coder_impl.TimestampCoderImpl() def encode_to_stream(self, value, stream, nested): self._int_coder_impl.encode_to_stream(value.auction, stream, True) self._int_coder_impl.encode_to_stream(value.bidder, stream, True) self._int_coder_impl.encode_to_stream(value.price, stream, True) self._time_coder_impl.encode_to_stream(value.date_time, stream, True) self._str_coder_impl.encode_to_stream(value.extra, stream, True) def decode_from_stream(self, stream, nested): auction = self._int_coder_impl.decode_from_stream(stream, True) bidder = self._int_coder_impl.decode_from_stream(stream, True) price = self._int_coder_impl.decode_from_stream(stream, True) date_time = self._time_coder_impl.decode_from_stream(stream, True) extra = self._str_coder_impl.decode_from_stream(stream, True) return Bid(auction, bidder, price, date_time, extra)
def _nonnull_coder_from_type(field_type): type_info = field_type.WhichOneof("type_info") if type_info == "atomic_type": if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64): return VarIntCoder() elif field_type.atomic_type == schema_pb2.DOUBLE: return FloatCoder() elif field_type.atomic_type == schema_pb2.STRING: return StrUtf8Coder() elif field_type.atomic_type == schema_pb2.BOOLEAN: return BooleanCoder() elif field_type.atomic_type == schema_pb2.BYTES: return BytesCoder() elif type_info == "array_type": return IterableCoder( _coder_from_type(field_type.array_type.element_type)) elif type_info == "map_type": return MapCoder(_coder_from_type(field_type.map_type.key_type), _coder_from_type(field_type.map_type.value_type)) elif type_info == "logical_type": # Special case for the Any logical type. Just use the default coder for an # unknown Python object. if field_type.logical_type.urn == PYTHON_ANY_URN: return typecoders.registry.get_coder(object) logical_type = LogicalType.from_runner_api(field_type.logical_type) return LogicalTypeCoder( logical_type, _coder_from_type(field_type.logical_type.representation)) elif type_info == "row_type": return RowCoder(field_type.row_type.schema) # The Java SDK supports several more types, but the coders are not yet # standard, and are not implemented in Python. raise ValueError( "Encountered a type that is not currently supported by RowCoder: %s" % field_type)