def _get_schema_proto_and_payload(self, *args, **kwargs): named_fields = [] fields_to_values = OrderedDict() next_field_id = 0 for value in args: if value is None: raise ValueError( 'Received value None. None values are currently not supported' ) named_fields.append( ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id), convert_to_typing_type(instance_to_type(value)))) fields_to_values[( JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id)] = value next_field_id += 1 for key, value in kwargs.items(): if not key: raise ValueError('Parameter name cannot be empty') if value is None: raise ValueError( 'Received value None for key %s. None values are currently not ' 'supported' % key) named_fields.append( (key, convert_to_typing_type(instance_to_type(value)))) fields_to_values[key] = value schema_proto = named_fields_to_schema(named_fields) row = named_tuple_from_schema(schema_proto)(**fields_to_values) schema = named_tuple_to_schema(type(row)) payload = RowCoder(schema).encode(row) return (schema_proto, payload)
def test_user_type_annotated_with_id_after_conversion(self): MyCuteClass = NamedTuple('MyCuteClass', [ ('name', str), ]) self.assertFalse(hasattr(MyCuteClass, '_beam_schema_id')) schema = named_tuple_to_schema(MyCuteClass) self.assertTrue(hasattr(MyCuteClass, '_beam_schema_id')) self.assertEqual(MyCuteClass._beam_schema_id, schema.id)
def set_encoding_position(type_, values): beam_schema_id = "_beam_schema_id" if hasattr(type_, beam_schema_id): schema = SCHEMA_REGISTRY.get_schema_by_id(getattr(type_, beam_schema_id)) else: schema = named_tuple_to_schema(type_) val = dict(values) for idx, field in enumerate(schema.fields): schema.fields[idx].encoding_position = val[field.name] SCHEMA_REGISTRY.add(type_, schema)
def from_type_hint(type_hint, registry): if isinstance(type_hint, row_type.RowTypeConstraint): try: schema = named_fields_to_schema(type_hint._fields) except ValueError: # TODO(BEAM-10570): Consider a pythonsdk logical type. return typecoders.registry.get_coder(object) else: schema = named_tuple_to_schema(type_hint) return RowCoder(schema)
def __init__( self, project_id, instance_id, database_id, row_type=None, sql=None, table=None, host=None, emulator_host=None, batching=None, timestamp_bound_mode=None, read_timestamp=None, staleness=None, time_unit=None, expansion_service=None, ): """ Initializes a read operation from Spanner. :param project_id: Specifies the Cloud Spanner project. :param instance_id: Specifies the Cloud Spanner instance. :param database_id: Specifies the Cloud Spanner database. :param row_type: Row type that fits the given query or table. Passed as NamedTuple, e.g. NamedTuple('name', [('row_name', unicode)]) :param sql: An sql query to execute. It's results must fit the provided row_type. Don't use when table is set. :param table: A spanner table. When provided all columns from row_type will be selected to query. Don't use when query is set. :param batching: By default Batch API is used to read data from Cloud Spanner. It is useful to disable batching when the underlying query is not root-partitionable. :param host: Specifies the Cloud Spanner host. :param emulator_host: Specifies Spanner emulator host. :param timestamp_bound_mode: Defines how Cloud Spanner will choose a timestamp for a read-only transaction or a single read/query. Passed as TimestampBoundMode enum. Possible values: STRONG: A timestamp bound that will perform reads and queries at a timestamp where all previously committed transactions are visible. READ_TIMESTAMP: Returns a timestamp bound that will perform reads and queries at the given timestamp. MIN_READ_TIMESTAMP: Returns a timestamp bound that will perform reads and queries at a timestamp chosen to be at least given timestamp value. EXACT_STALENESS: Returns a timestamp bound that will perform reads and queries at an exact staleness. The timestamp is chosen soon after the read is started. MAX_STALENESS: Returns a timestamp bound that will perform reads and queries at a timestamp chosen to be at most time_unit stale. :param read_timestamp: Timestamp in string. Use only when timestamp_bound_mode is set to READ_TIMESTAMP or MIN_READ_TIMESTAMP. :param staleness: Staleness value as int. Use only when timestamp_bound_mode is set to EXACT_STALENESS or MAX_STALENESS. time_unit has to be set along with this param. :param time_unit: Time unit for staleness_value passed as TimeUnit enum. Possible values: NANOSECONDS, MICROSECONDS, MILLISECONDS, SECONDS, HOURS, DAYS. :param expansion_service: The address (host:port) of the ExpansionService. """ assert row_type assert sql or table and not (sql and table) staleness_value = int(staleness) if staleness else None if staleness_value or time_unit: assert staleness_value and time_unit and \ timestamp_bound_mode is TimestampBoundMode.MAX_STALENESS or \ timestamp_bound_mode is TimestampBoundMode.EXACT_STALENESS if read_timestamp: assert timestamp_bound_mode is TimestampBoundMode.MIN_READ_TIMESTAMP\ or timestamp_bound_mode is TimestampBoundMode.READ_TIMESTAMP super(ReadFromSpanner, self).__init__( self.URN, NamedTupleBasedPayloadBuilder( ReadFromSpannerSchema( instance_id=instance_id, database_id=database_id, sql=sql, table=table, schema=named_tuple_to_schema(row_type).SerializeToString(), project_id=project_id, host=host, emulator_host=emulator_host, batching=batching, timestamp_bound_mode=_get_enum_name(timestamp_bound_mode), read_timestamp=read_timestamp, staleness=staleness, time_unit=_get_enum_name(time_unit), ), ), expansion_service or default_io_expansion_service(), )
def __reduce__(self): # when pickling, use bytes representation of the schema. return (self._from_serialized_schema, (named_tuple_to_schema( self._namedtuple_ctor).SerializeToString(), ))
def from_type_hint(type_hint, registry): if isinstance(type_hint, row_type.RowTypeConstraint): schema = named_fields_to_schema(type_hint._fields) else: schema = named_tuple_to_schema(type_hint) return RowCoder(schema)
def build(self): row = self._get_named_tuple_instance() schema = named_tuple_to_schema(type(row)) return ExternalConfigurationPayload( schema=schema, payload=RowCoder(schema).encode(row))
def from_type_hint(named_tuple_type, registry): return RowCoder(named_tuple_to_schema(named_tuple_type))