def typing_to_runner_api(type_): if match_is_named_tuple(type_): schema = None if hasattr(type_, _BEAM_SCHEMA_ID): schema = SCHEMA_REGISTRY.get_schema_by_id(getattr(type_, _BEAM_SCHEMA_ID)) if schema is None: fields = [ schema_pb2.Field( name=name, type=typing_to_runner_api(type_._field_types[name])) for name in type_._fields ] type_id = str(uuid4()) schema = schema_pb2.Schema(fields=fields, id=type_id) setattr(type_, _BEAM_SCHEMA_ID, type_id) SCHEMA_REGISTRY.add(type_, schema) return schema_pb2.FieldType(row_type=schema_pb2.RowType(schema=schema)) # All concrete types (other than NamedTuple sub-classes) should map to # a supported primitive type. elif type_ in PRIMITIVE_TO_ATOMIC_TYPE: return schema_pb2.FieldType(atomic_type=PRIMITIVE_TO_ATOMIC_TYPE[type_]) elif _match_is_exactly_mapping(type_): key_type, value_type = map(typing_to_runner_api, _get_args(type_)) return schema_pb2.FieldType( map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type)) elif _match_is_optional(type_): # It's possible that a user passes us Optional[Optional[T]], but in python # typing this is indistinguishable from Optional[T] - both resolve to # Union[T, None] - so there's no need to check for that case here. result = typing_to_runner_api(extract_optional_type(type_)) result.nullable = True return result elif _safe_issubclass(type_, Sequence): element_type = typing_to_runner_api(_get_args(type_)[0]) return schema_pb2.FieldType( array_type=schema_pb2.ArrayType(element_type=element_type)) elif _safe_issubclass(type_, Mapping): key_type, value_type = map(typing_to_runner_api, _get_args(type_)) return schema_pb2.FieldType( map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type)) try: logical_type = LogicalType.from_typing(type_) except ValueError: # Unknown type, just treat it like Any return schema_pb2.FieldType( logical_type=schema_pb2.LogicalType(urn=PYTHON_ANY_URN)) else: # TODO(bhulette): Add support for logical types that require arguments return schema_pb2.FieldType( logical_type=schema_pb2.LogicalType( urn=logical_type.urn(), representation=typing_to_runner_api( logical_type.representation_type())))
def test_python_callable_maps_to_logical_type(self): from apache_beam.utils.python_callable import PythonCallableWithSource self.assertEqual( schema_pb2.FieldType(logical_type=schema_pb2.LogicalType( urn=common_urns.python_callable.urn, representation=typing_to_runner_api(str))), typing_to_runner_api(PythonCallableWithSource)) self.assertEqual( typing_from_runner_api( schema_pb2.FieldType(logical_type=schema_pb2.LogicalType( urn=common_urns.python_callable.urn, representation=typing_to_runner_api(str)))), PythonCallableWithSource)
def test_create_row_coder_from_schema(self): schema = schema_pb2.Schema( id="person", fields=[ schema_pb2.Field( name="name", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)), schema_pb2.Field( name="age", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)), schema_pb2.Field( name="address", type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING, nullable=True)), schema_pb2.Field( name="aliases", type=schema_pb2.FieldType( array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING)))), schema_pb2.Field( name="knows_javascript", type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)), schema_pb2.Field( name="payload", type=schema_pb2.FieldType( atomic_type=schema_pb2.BYTES, nullable=True)), schema_pb2.Field( name="custom_metadata", type=schema_pb2.FieldType( map_type=schema_pb2.MapType( key_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING), value_type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64), ))), schema_pb2.Field( name="favorite_time", type=schema_pb2.FieldType( logical_type=schema_pb2.LogicalType( urn="beam:logical_type:micros_instant:v1", representation=schema_pb2.FieldType( row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id="micros_instant", fields=[ schema_pb2.Field( name="seconds", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), schema_pb2.Field( name="micros", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), ])))))), ]) coder = RowCoder(schema) for test_case in self.PEOPLE: self.assertEqual(test_case, coder.decode(coder.encode(test_case)))
def test_unknown_primitive_maps_to_any(self): self.assertEqual( typing_to_runner_api(np.uint32), schema_pb2.FieldType(logical_type=schema_pb2.LogicalType( urn="beam:logical:pythonsdk_any:v1"), nullable=True))