def test_typing_survives_proto_roundtrip(self): all_nonoptional_primitives = [ np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, bool, bytes, str, ] all_optional_primitives = [ Optional[typ] for typ in all_nonoptional_primitives ] all_primitives = all_nonoptional_primitives + all_optional_primitives basic_array_types = [Sequence[typ] for typ in all_primitives] basic_map_types = [ Mapping[key_type, value_type] for key_type, value_type in itertools.product( all_primitives, all_primitives) ] selected_schemas = [ NamedTuple('AllPrimitives', [('field%d' % i, typ) for i, typ in enumerate(all_primitives)]), NamedTuple('ComplexSchema', [ ('id', np.int64), ('name', str), ('optional_map', Optional[Mapping[str, Optional[np.float64]]]), ('optional_array', Optional[Sequence[np.float32]]), ('array_optional', Sequence[Optional[bool]]), ('timestamp', Timestamp), ]) ] test_cases = all_primitives + \ basic_array_types + \ basic_map_types for test_case in test_cases: self.assertEqual( test_case, typing_from_runner_api(typing_to_runner_api( test_case, schema_registry=SchemaTypeRegistry()), schema_registry=SchemaTypeRegistry())) # Break out NamedTuple types since they require special verification for test_case in selected_schemas: self.assert_namedtuple_equivalent( test_case, typing_from_runner_api(typing_to_runner_api( test_case, schema_registry=SchemaTypeRegistry()), schema_registry=SchemaTypeRegistry()))
def test_typing_survives_proto_roundtrip(self): all_nonoptional_primitives = [ np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, unicode, bool, ] # The bytes type cannot survive a roundtrip to/from proto in Python 2. # In order to use BYTES a user type has to use typing.ByteString (because # bytes == str, and we map str to STRING). # TODO(BEAM-7372) if IS_PYTHON_3: all_nonoptional_primitives.extend([bytes]) all_optional_primitives = [ Optional[typ] for typ in all_nonoptional_primitives ] all_primitives = all_nonoptional_primitives + all_optional_primitives basic_array_types = [Sequence[typ] for typ in all_primitives] basic_map_types = [ Mapping[key_type, value_type] for key_type, value_type in itertools.product(all_primitives, all_primitives) ] selected_schemas = [ NamedTuple( 'AllPrimitives', [('field%d' % i, typ) for i, typ in enumerate(all_primitives)]), NamedTuple( 'ComplexSchema', [ ('id', np.int64), ('name', unicode), ( 'optional_map', Optional[Mapping[unicode, Optional[np.float64]]]), ('optional_array', Optional[Sequence[np.float32]]), ('array_optional', Sequence[Optional[bool]]), ('timestamp', Timestamp), ]) ] test_cases = all_primitives + \ basic_array_types + \ basic_map_types + \ selected_schemas for test_case in test_cases: self.assertEqual( test_case, typing_from_runner_api(typing_to_runner_api(test_case)))
def test_typing_survives_proto_roundtrip(self): all_nonoptional_primitives = [ np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, unicode, bool, bytes, str, ] all_optional_primitives = [ Optional[typ] for typ in all_nonoptional_primitives ] all_primitives = all_nonoptional_primitives + all_optional_primitives basic_array_types = [Sequence[typ] for typ in all_primitives] basic_map_types = [ Mapping[key_type, value_type] for key_type, value_type in itertools.product(all_primitives, all_primitives) ] selected_schemas = [ NamedTuple( 'AllPrimitives', [('field%d' % i, typ) for i, typ in enumerate(all_primitives)]), NamedTuple( 'ComplexSchema', [ ('id', np.int64), ('name', unicode), ( 'optional_map', Optional[Mapping[unicode, Optional[np.float64]]]), ('optional_array', Optional[Sequence[np.float32]]), ('array_optional', Sequence[Optional[bool]]), ('timestamp', Timestamp), ]) ] test_cases = all_primitives + \ basic_array_types + \ basic_map_types + \ selected_schemas for test_case in test_cases: self.assertEqual( test_case, typing_from_runner_api(typing_to_runner_api(test_case)))
def test_python_callable_maps_to_logical_type(self): from apache_beam.utils.python_callable import PythonCallableWithSource self.assertEqual( schema_pb2.FieldType(logical_type=schema_pb2.LogicalType( urn=common_urns.python_callable.urn, representation=typing_to_runner_api(str))), typing_to_runner_api(PythonCallableWithSource)) self.assertEqual( typing_from_runner_api( schema_pb2.FieldType(logical_type=schema_pb2.LogicalType( urn=common_urns.python_callable.urn, representation=typing_to_runner_api(str)))), PythonCallableWithSource)
def test_unknown_atomic_raise_valueerror(self): self.assertRaises( ValueError, lambda: typing_from_runner_api( schema_pb2.FieldType(atomic_type=schema_pb2.UNSPECIFIED)))
def test_proto_survives_typing_roundtrip(self): all_nonoptional_primitives = [ schema_pb2.FieldType(atomic_type=typ) for typ in schema_pb2.AtomicType.values() if typ is not schema_pb2.UNSPECIFIED ] # The bytes type cannot survive a roundtrip to/from proto in Python 2. # In order to use BYTES a user type has to use typing.ByteString (because # bytes == str, and we map str to STRING). if not IS_PYTHON_3: all_nonoptional_primitives.remove( schema_pb2.FieldType(atomic_type=schema_pb2.BYTES)) all_optional_primitives = [ schema_pb2.FieldType(nullable=True, atomic_type=typ) for typ in schema_pb2.AtomicType.values() if typ is not schema_pb2.UNSPECIFIED ] all_primitives = all_nonoptional_primitives + all_optional_primitives basic_array_types = [ schema_pb2.FieldType(array_type=schema_pb2.ArrayType( element_type=typ)) for typ in all_primitives ] basic_map_types = [ schema_pb2.FieldType(map_type=schema_pb2.MapType( key_type=key_type, value_type=value_type)) for key_type, value_type in itertools.product( all_primitives, all_primitives) ] selected_schemas = [ schema_pb2.FieldType(row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id='32497414-85e8-46b7-9c90-9a9cc62fe390', fields=[ schema_pb2.Field(name='field%d' % i, type=typ) for i, typ in enumerate(all_primitives) ]))), schema_pb2.FieldType(row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id='dead1637-3204-4bcb-acf8-99675f338600', fields=[ schema_pb2.Field(name='id', type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), schema_pb2.Field(name='name', type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING)), schema_pb2.Field( name='optional_map', type=schema_pb2.FieldType( nullable=True, map_type=schema_pb2.MapType( key_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING), value_type=schema_pb2.FieldType( atomic_type=schema_pb2.DOUBLE)))), schema_pb2.Field( name='optional_array', type=schema_pb2.FieldType( nullable=True, array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( atomic_type=schema_pb2.FLOAT)))), schema_pb2.Field( name='array_optional', type=schema_pb2.FieldType( array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( nullable=True, atomic_type=schema_pb2.BYTES)))), ]))), ] test_cases = all_primitives + \ basic_array_types + \ basic_map_types + \ selected_schemas for test_case in test_cases: self.assertEqual( test_case, typing_to_runner_api(typing_from_runner_api(test_case)))
def test_proto_survives_typing_roundtrip(self): all_nonoptional_primitives = [ schema_pb2.FieldType(atomic_type=typ) for typ in schema_pb2.AtomicType.values() if typ is not schema_pb2.UNSPECIFIED ] all_optional_primitives = [ schema_pb2.FieldType(nullable=True, atomic_type=typ) for typ in schema_pb2.AtomicType.values() if typ is not schema_pb2.UNSPECIFIED ] all_primitives = all_nonoptional_primitives + all_optional_primitives basic_array_types = [ schema_pb2.FieldType(array_type=schema_pb2.ArrayType( element_type=typ)) for typ in all_primitives ] basic_map_types = [ schema_pb2.FieldType(map_type=schema_pb2.MapType( key_type=key_type, value_type=value_type)) for key_type, value_type in itertools.product( all_primitives, all_primitives) ] selected_schemas = [ schema_pb2.FieldType(row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id='32497414-85e8-46b7-9c90-9a9cc62fe390', fields=[ schema_pb2.Field(name='field%d' % i, type=typ) for i, typ in enumerate(all_primitives) ]))), schema_pb2.FieldType(row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id='dead1637-3204-4bcb-acf8-99675f338600', fields=[ schema_pb2.Field(name='id', type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), schema_pb2.Field(name='name', type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING)), schema_pb2.Field( name='optional_map', type=schema_pb2.FieldType( nullable=True, map_type=schema_pb2.MapType( key_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING), value_type=schema_pb2.FieldType( atomic_type=schema_pb2.DOUBLE)))), schema_pb2.Field( name='optional_array', type=schema_pb2.FieldType( nullable=True, array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( atomic_type=schema_pb2.FLOAT)))), schema_pb2.Field( name='array_optional', type=schema_pb2.FieldType( array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( nullable=True, atomic_type=schema_pb2.BYTES)))), ]))), ] test_cases = all_primitives + \ basic_array_types + \ basic_map_types + \ selected_schemas for test_case in test_cases: self.assertEqual( test_case, typing_to_runner_api(typing_from_runner_api( test_case, schema_registry=SchemaTypeRegistry()), schema_registry=SchemaTypeRegistry()))