def to_runner_api_parameter(self, unused_context): _args_schema = named_fields_to_schema([ (f'arg{ix}', convert_to_typing_type(instance_to_type(value))) for (ix, value) in enumerate(self._args) ]) _kwargs_schema = named_fields_to_schema([ (key, convert_to_typing_type(instance_to_type(value))) for (key, value) in self._kwargs.items() ]) payload_schema = named_fields_to_schema({ 'constructor': str, 'args': _args_schema, 'kwargs': _kwargs_schema, }) return (PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN, external_transforms_pb2.ExternalConfigurationPayload( schema=payload_schema, payload=coders.RowCoder(payload_schema).encode( Row(constructor=self._constructor, args=Row( **{ f'arg{ix}': arg for (ix, arg) in enumerate(self._args) }), kwargs=Row(**self._kwargs)), )))
def _get_named_tuple_instance(self): schema = named_fields_to_schema([ (k, convert_to_typing_type(v)) for k, v in self._transform.__init__.__annotations__.items() if k in self._values ]) return named_tuple_from_schema(schema)(**self._values)
def _get_schema_proto_and_payload(self, *args, **kwargs): named_fields = [] fields_to_values = OrderedDict() next_field_id = 0 for value in args: if value is None: raise ValueError( 'Received value None. None values are currently not supported' ) named_fields.append( ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id), convert_to_typing_type(instance_to_type(value)))) fields_to_values[( JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id)] = value next_field_id += 1 for key, value in kwargs.items(): if not key: raise ValueError('Parameter name cannot be empty') if value is None: raise ValueError( 'Received value None for key %s. None values are currently not ' 'supported' % key) named_fields.append( (key, convert_to_typing_type(instance_to_type(value)))) fields_to_values[key] = value schema_proto = named_fields_to_schema(named_fields) row = named_tuple_from_schema(schema_proto)(**fields_to_values) schema = named_tuple_to_schema(type(row)) payload = RowCoder(schema).encode(row) return (schema_proto, payload)
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } # In python 2 named_fields_to_schema will not accept str because its # ambiguous. This converts str hints to ByteString recursively so its clear # we intend to use BYTES. # TODO(BEAM-7372): Remove coercion to ByteString def coerce_str_to_bytes(typ): if typ == str: return ByteString elif hasattr(typ, '__args__') and hasattr(typ, '__origin__'): # Create a new type rather than modifying the existing one typ = typ.__origin__[tuple( map(coerce_str_to_bytes, typ.__args__))] return typ if sys.version_info[0] >= 3: coerce_str_to_bytes = lambda x: x schema = named_fields_to_schema([ (key, coerce_str_to_bytes( convert_to_typing_type(instance_to_type(value)))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } # TODO(BEAM-7372): Remove coercion to ByteString def coerce_str_to_bytes(typ): if typ == str: return ByteString elif hasattr(typ, '__args__'): typ.__args__ = tuple(map(coerce_str_to_bytes, typ.__args__)) return typ if str == unicode: coerce_str_to_bytes = lambda x: x schema = named_fields_to_schema([ (key, coerce_str_to_bytes( convert_to_typing_type(instance_to_type(value)))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def _get_named_tuple_instance(self): import dataclasses schema = named_fields_to_schema([ (field.name, convert_to_typing_type(field.type)) for field in dataclasses.fields(self._transform) ]) return named_tuple_from_schema(schema)( **dataclasses.asdict(self._transform))
def from_type_hint(type_hint, registry): if isinstance(type_hint, row_type.RowTypeConstraint): try: schema = named_fields_to_schema(type_hint._fields) except ValueError: # TODO(BEAM-10570): Consider a pythonsdk logical type. return typecoders.registry.get_coder(object) else: schema = named_tuple_to_schema(type_hint) return RowCoder(schema)
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } schema = named_fields_to_schema([ (key, convert_to_typing_type(instance_to_type(value))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def element_type_from_dataframe(proxy, include_indexes=False): # type: (pd.DataFrame, bool) -> type """Generate an element_type for an element-wise PCollection from a proxy pandas object. Currently only supports converting the element_type for a schema-aware PCollection to a proxy DataFrame. Currently only supports generating a DataFrame proxy from a schema-aware PCollection. """ output_columns = [] if include_indexes: remaining_index_names = list(proxy.index.names) i = 0 while len(remaining_index_names): index_name = remaining_index_names.pop(0) if index_name is None: raise ValueError( "Encountered an unnamed index. Cannot convert to a " "schema-aware PCollection with include_indexes=True. " "Please name all indexes or consider not including " "indexes.") elif index_name in remaining_index_names: raise ValueError( "Encountered multiple indexes with the name '%s'. " "Cannot convert to a schema-aware PCollection with " "include_indexes=True. Please ensure all indexes have " "unique names or consider not including indexes." % index_name) elif index_name in proxy.columns: raise ValueError( "Encountered an index that has the same name as one " "of the columns, '%s'. Cannot convert to a " "schema-aware PCollection with include_indexes=True. " "Please ensure all indexes have unique names or " "consider not including indexes." % index_name) else: # its ok! output_columns.append( (index_name, proxy.index.get_level_values(i).dtype)) i += 1 output_columns.extend(zip(proxy.columns, proxy.dtypes)) return named_tuple_from_schema( named_fields_to_schema([(column, _dtype_to_fieldtype(dtype)) for (column, dtype) in output_columns]))
def from_type_hint(type_hint, registry): if isinstance(type_hint, row_type.RowTypeConstraint): schema = named_fields_to_schema(type_hint._fields) else: schema = named_tuple_to_schema(type_hint) return RowCoder(schema)
def convert_to_typing_type(type_): if isinstance(type_, row_type.RowTypeConstraint): return named_tuple_from_schema(named_fields_to_schema(type_._fields)) else: return native_type_compatibility.convert_to_typing_type(type_)