def test_from_rfc3339(self): test_cases = [ (10000000, '1970-04-26T17:46:40Z'), (10000000.000001, '1970-04-26T17:46:40.000001Z'), (1458343379.123456, '2016-03-18T23:22:59.123456Z'), ] for seconds_float, rfc3339_str in test_cases: self.assertEqual(Timestamp(seconds_float), Timestamp.from_rfc3339(rfc3339_str)) self.assertEqual(rfc3339_str, Timestamp.from_rfc3339(rfc3339_str).to_rfc3339())
def test_from_rfc3339(self): test_cases = [ (10000000, '1970-04-26T17:46:40Z'), (10000000.000001, '1970-04-26T17:46:40.000001Z'), (1458343379.123456, '2016-03-18T23:22:59.123456Z'), ] for seconds_float, rfc3339_str in test_cases: self.assertEqual(Timestamp(seconds_float), Timestamp.from_rfc3339(rfc3339_str)) self.assertEqual(rfc3339_str, Timestamp.from_rfc3339(rfc3339_str).to_rfc3339())
def _get_element(message): parsed_message = PubsubMessage._from_message(message) if (timestamp_attribute and timestamp_attribute in parsed_message.attributes): rfc3339_or_milli = parsed_message.attributes[timestamp_attribute] try: timestamp = Timestamp.from_rfc3339(rfc3339_or_milli) except ValueError: try: timestamp = Timestamp(micros=int(rfc3339_or_milli) * 1000) except ValueError as e: raise ValueError('Bad timestamp value: %s' % e) else: timestamp = Timestamp.from_rfc3339(message.service_timestamp) return timestamp, parsed_message
def test_from_rfc3339_with_timezone(self): test_cases = [ (1458328979.123456, '2016-03-18T23:22:59.123456+04:00'), (1458357779.123456, '2016-03-18T23:22:59.123456-04:00'), ] for seconds_float, rfc3339_str in test_cases: self.assertEqual(Timestamp(seconds_float), Timestamp.from_rfc3339(rfc3339_str))
def _get_element(message): parsed_message = PubsubMessage._from_message(message) if timestamp_attribute: try: rfc3339_or_milli = parsed_message.attributes[timestamp_attribute] except KeyError as e: raise KeyError('Timestamp attribute not found: %s' % e) try: timestamp = Timestamp.from_rfc3339(rfc3339_or_milli) except ValueError: try: timestamp = Timestamp(micros=int(rfc3339_or_milli) * 1000) except ValueError as e: raise ValueError('Bad timestamp value: %s' % e) else: timestamp = Timestamp.from_rfc3339(message.service_timestamp) return timestamp, parsed_message
def _get_element(message): parsed_message = PubsubMessage._from_message(message) if (timestamp_attribute and timestamp_attribute in parsed_message.attributes): rfc3339_or_milli = parsed_message.attributes[timestamp_attribute] try: timestamp = Timestamp.from_rfc3339(rfc3339_or_milli) except ValueError: try: timestamp = Timestamp(micros=int(rfc3339_or_milli) * 1000) except ValueError as e: raise ValueError('Bad timestamp value: %s' % e) else: timestamp = Timestamp(message.publish_time.seconds, message.publish_time.nanos // 1000) return timestamp, parsed_message
def _get_element(message): parsed_message = PubsubMessage._from_message(message) if (timestamp_attribute and timestamp_attribute in parsed_message.attributes): rfc3339_or_milli = parsed_message.attributes[timestamp_attribute] try: timestamp = Timestamp(micros=int(rfc3339_or_milli) * 1000) except ValueError: try: timestamp = Timestamp.from_rfc3339(rfc3339_or_milli) except ValueError as e: raise ValueError('Bad timestamp value: %s' % e) else: if message.publish_time is None: raise ValueError('No publish time present in message: %s' % message) timestamp = Timestamp.from_utc_datetime(message.publish_time) return timestamp, parsed_message
def test_from_rfc3339_failure(self): with self.assertRaisesRegex(ValueError, 'parse'): Timestamp.from_rfc3339('not rfc3339') with self.assertRaisesRegex(ValueError, 'parse'): Timestamp.from_rfc3339('2016-03-18T23:22:59.123456Z unparseable')
class RowCoderTest(unittest.TestCase): JON_SNOW = Person( name="Jon Snow", age=np.int32(23), address=None, aliases=["crow", "wildling"], knows_javascript=False, payload=None, custom_metadata={}, favorite_time=Timestamp.from_rfc3339('2016-03-18T23:22:59.123456Z'), ) PEOPLE = [ JON_SNOW, Person( "Daenerys Targaryen", np.int32(25), "Westeros", ["Mother of Dragons"], False, None, {"dragons": 3}, Timestamp.from_rfc3339('1970-04-26T17:46:40Z'), ), Person("Michael Bluth", np.int32(30), None, [], True, b"I've made a huge mistake", {}, Timestamp.from_rfc3339('2020-08-12T15:51:00.032Z')) ] def test_create_row_coder_from_named_tuple(self): expected_coder = RowCoder(typing_to_runner_api(Person).row_type.schema) real_coder = coders_registry.get_coder(Person) for test_case in self.PEOPLE: self.assertEqual(expected_coder.encode(test_case), real_coder.encode(test_case)) self.assertEqual(test_case, real_coder.decode(real_coder.encode(test_case))) def test_create_row_coder_from_schema(self): schema = schema_pb2.Schema( id="person", fields=[ schema_pb2.Field( name="name", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)), schema_pb2.Field( name="age", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)), schema_pb2.Field(name="address", type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING, nullable=True)), schema_pb2.Field( name="aliases", type=schema_pb2.FieldType(array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING)))), schema_pb2.Field( name="knows_javascript", type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)), schema_pb2.Field(name="payload", type=schema_pb2.FieldType( atomic_type=schema_pb2.BYTES, nullable=True)), schema_pb2.Field( name="custom_metadata", type=schema_pb2.FieldType(map_type=schema_pb2.MapType( key_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING), value_type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64), ))), schema_pb2.Field( name="favorite_time", type=schema_pb2. FieldType(logical_type=schema_pb2.LogicalType( urn="beam:logical_type:micros_instant:v1", representation=schema_pb2.FieldType( row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id="micros_instant", fields=[ schema_pb2.Field( name="seconds", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), schema_pb2.Field( name="micros", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), ])))))), ]) coder = RowCoder(schema) for test_case in self.PEOPLE: self.assertEqual(test_case, coder.decode(coder.encode(test_case))) @unittest.skip( "BEAM-8030 - Overflow behavior in VarIntCoder is currently inconsistent" ) def test_overflows(self): IntTester = typing.NamedTuple( 'IntTester', [ # TODO(BEAM-7996): Test int8 and int16 here as well when those # types are supported # ('i8', typing.Optional[np.int8]), # ('i16', typing.Optional[np.int16]), ('i32', typing.Optional[np.int32]), ('i64', typing.Optional[np.int64]), ]) c = RowCoder.from_type_hint(IntTester, None) no_overflow = chain( (IntTester(i32=i, i64=None) for i in (-2**31, 2**31 - 1)), (IntTester(i32=None, i64=i) for i in (-2**63, 2**63 - 1)), ) # Encode max/min ints to make sure they don't throw any error for case in no_overflow: c.encode(case) overflow = chain( (IntTester(i32=i, i64=None) for i in (-2**31 - 1, 2**31)), (IntTester(i32=None, i64=i) for i in (-2**63 - 1, 2**63)), ) # Encode max+1/min-1 ints to make sure they DO throw an error for case in overflow: self.assertRaises(OverflowError, lambda: c.encode(case)) def test_none_in_non_nullable_field_throws(self): Test = typing.NamedTuple('Test', [('foo', unicode)]) c = RowCoder.from_type_hint(Test, None) self.assertRaises(ValueError, lambda: c.encode(Test(foo=None))) def test_schema_remove_column(self): fields = [("field1", unicode), ("field2", unicode)] # new schema is missing one field that was in the old schema Old = typing.NamedTuple('Old', fields) New = typing.NamedTuple('New', fields[:-1]) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual(New("foo"), new_coder.decode(old_coder.encode(Old("foo", "bar")))) def test_schema_add_column(self): fields = [("field1", unicode), ("field2", typing.Optional[unicode])] # new schema has one (optional) field that didn't exist in the old schema Old = typing.NamedTuple('Old', fields[:-1]) New = typing.NamedTuple('New', fields) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual(New("bar", None), new_coder.decode(old_coder.encode(Old("bar")))) def test_schema_add_column_with_null_value(self): fields = [("field1", typing.Optional[unicode]), ("field2", unicode), ("field3", typing.Optional[unicode])] # new schema has one (optional) field that didn't exist in the old schema Old = typing.NamedTuple('Old', fields[:-1]) New = typing.NamedTuple('New', fields) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual(New(None, "baz", None), new_coder.decode(old_coder.encode(Old(None, "baz")))) def test_row_coder_picklable(self): # occasionally coders can get pickled, RowCoder should be able to handle it coder = coders_registry.get_coder(Person) roundtripped = pickler.loads(pickler.dumps(coder)) self.assertEqual(roundtripped, coder) def test_row_coder_in_pipeine(self): with TestPipeline() as p: res = (p | beam.Create(self.PEOPLE) | beam.Filter(lambda person: person.name == "Jon Snow")) assert_that(res, equal_to([self.JON_SNOW])) def test_row_coder_nested_struct(self): Pair = typing.NamedTuple('Pair', [('left', Person), ('right', Person)]) value = Pair(self.PEOPLE[0], self.PEOPLE[1]) coder = RowCoder(typing_to_runner_api(Pair).row_type.schema) self.assertEqual(value, coder.decode(coder.encode(value)))
class RowCoderTest(unittest.TestCase): JON_SNOW = Person( name="Jon Snow", age=np.int32(23), address=None, aliases=["crow", "wildling"], knows_javascript=False, payload=None, custom_metadata={}, favorite_time=Timestamp.from_rfc3339('2016-03-18T23:22:59.123456Z'), ) PEOPLE = [ JON_SNOW, Person( "Daenerys Targaryen", np.int32(25), "Westeros", ["Mother of Dragons"], False, None, {"dragons": 3}, Timestamp.from_rfc3339('1970-04-26T17:46:40Z'), ), Person( "Michael Bluth", np.int32(30), None, [], True, b"I've made a huge mistake", {}, Timestamp.from_rfc3339('2020-08-12T15:51:00.032Z')) ] def test_row_accepts_trailing_zeros_truncated(self): expected_coder = RowCoder( typing_to_runner_api(NullablePerson).row_type.schema) person = NullablePerson( None, np.int32(25), "Westeros", ["Mother of Dragons"], False, None, {"dragons": 3}, None, "NotNull") out = expected_coder.encode(person) # 9 fields, 1 null byte, field 0, 5, 7 are null new_payload = bytes([9, 1, 1 | 1 << 5 | 1 << 7]) + out[4:] new_value = expected_coder.decode(new_payload) self.assertEqual(person, new_value) def test_create_row_coder_from_named_tuple(self): expected_coder = RowCoder(typing_to_runner_api(Person).row_type.schema) real_coder = coders_registry.get_coder(Person) for test_case in self.PEOPLE: self.assertEqual( expected_coder.encode(test_case), real_coder.encode(test_case)) self.assertEqual( test_case, real_coder.decode(real_coder.encode(test_case))) def test_create_row_coder_from_schema(self): schema = schema_pb2.Schema( id="person", fields=[ schema_pb2.Field( name="name", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)), schema_pb2.Field( name="age", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)), schema_pb2.Field( name="address", type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING, nullable=True)), schema_pb2.Field( name="aliases", type=schema_pb2.FieldType( array_type=schema_pb2.ArrayType( element_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING)))), schema_pb2.Field( name="knows_javascript", type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)), schema_pb2.Field( name="payload", type=schema_pb2.FieldType( atomic_type=schema_pb2.BYTES, nullable=True)), schema_pb2.Field( name="custom_metadata", type=schema_pb2.FieldType( map_type=schema_pb2.MapType( key_type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING), value_type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64), ))), schema_pb2.Field( name="favorite_time", type=schema_pb2.FieldType( logical_type=schema_pb2.LogicalType( urn="beam:logical_type:micros_instant:v1", representation=schema_pb2.FieldType( row_type=schema_pb2.RowType( schema=schema_pb2.Schema( id="micros_instant", fields=[ schema_pb2.Field( name="seconds", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), schema_pb2.Field( name="micros", type=schema_pb2.FieldType( atomic_type=schema_pb2.INT64)), ])))))), ]) coder = RowCoder(schema) for test_case in self.PEOPLE: self.assertEqual(test_case, coder.decode(coder.encode(test_case))) @unittest.skip( "BEAM-8030 - Overflow behavior in VarIntCoder is currently inconsistent") def test_overflows(self): IntTester = typing.NamedTuple( 'IntTester', [ # TODO(BEAM-7996): Test int8 and int16 here as well when those # types are supported # ('i8', typing.Optional[np.int8]), # ('i16', typing.Optional[np.int16]), ('i32', typing.Optional[np.int32]), ('i64', typing.Optional[np.int64]), ]) c = RowCoder.from_type_hint(IntTester, None) no_overflow = chain( (IntTester(i32=i, i64=None) for i in (-2**31, 2**31 - 1)), (IntTester(i32=None, i64=i) for i in (-2**63, 2**63 - 1)), ) # Encode max/min ints to make sure they don't throw any error for case in no_overflow: c.encode(case) overflow = chain( (IntTester(i32=i, i64=None) for i in (-2**31 - 1, 2**31)), (IntTester(i32=None, i64=i) for i in (-2**63 - 1, 2**63)), ) # Encode max+1/min-1 ints to make sure they DO throw an error # pylint: disable=cell-var-from-loop for case in overflow: self.assertRaises(OverflowError, lambda: c.encode(case)) def test_none_in_non_nullable_field_throws(self): Test = typing.NamedTuple('Test', [('foo', str)]) c = RowCoder.from_type_hint(Test, None) self.assertRaises(ValueError, lambda: c.encode(Test(foo=None))) def test_schema_remove_column(self): fields = [("field1", str), ("field2", str)] # new schema is missing one field that was in the old schema Old = typing.NamedTuple('Old', fields) New = typing.NamedTuple('New', fields[:-1]) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual( New("foo"), new_coder.decode(old_coder.encode(Old("foo", "bar")))) def test_schema_add_column(self): fields = [("field1", str), ("field2", typing.Optional[str])] # new schema has one (optional) field that didn't exist in the old schema Old = typing.NamedTuple('Old', fields[:-1]) New = typing.NamedTuple('New', fields) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual( New("bar", None), new_coder.decode(old_coder.encode(Old("bar")))) def test_schema_add_column_with_null_value(self): fields = [("field1", typing.Optional[str]), ("field2", str), ("field3", typing.Optional[str])] # new schema has one (optional) field that didn't exist in the old schema Old = typing.NamedTuple('Old', fields[:-1]) New = typing.NamedTuple('New', fields) old_coder = RowCoder.from_type_hint(Old, None) new_coder = RowCoder.from_type_hint(New, None) self.assertEqual( New(None, "baz", None), new_coder.decode(old_coder.encode(Old(None, "baz")))) def test_row_coder_picklable(self): # occasionally coders can get pickled, RowCoder should be able to handle it coder = coders_registry.get_coder(Person) roundtripped = pickler.loads(pickler.dumps(coder)) self.assertEqual(roundtripped, coder) def test_row_coder_in_pipeine(self): with TestPipeline() as p: res = ( p | beam.Create(self.PEOPLE) | beam.Filter(lambda person: person.name == "Jon Snow")) assert_that(res, equal_to([self.JON_SNOW])) def test_row_coder_nested_struct(self): Pair = typing.NamedTuple('Pair', [('left', Person), ('right', Person)]) value = Pair(self.PEOPLE[0], self.PEOPLE[1]) coder = RowCoder(typing_to_runner_api(Pair).row_type.schema) self.assertEqual(value, coder.decode(coder.encode(value))) def test_encoding_position_reorder_fields(self): schema1 = schema_pb2.Schema( id="reorder_test_schema1", fields=[ schema_pb2.Field( name="f_int32", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32), ), schema_pb2.Field( name="f_str", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING), ), ]) schema2 = schema_pb2.Schema( id="reorder_test_schema2", encoding_positions_set=True, fields=[ schema_pb2.Field( name="f_str", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING), encoding_position=1, ), schema_pb2.Field( name="f_int32", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32), encoding_position=0, ), ]) RowSchema1 = named_tuple_from_schema(schema1) RowSchema2 = named_tuple_from_schema(schema2) roundtripped = RowCoder(schema2).decode( RowCoder(schema1).encode(RowSchema1(42, "Hello World!"))) self.assertEqual(RowSchema2(f_int32=42, f_str="Hello World!"), roundtripped) def test_encoding_position_add_fields_and_reorder(self): old_schema = schema_pb2.Schema( id="add_test_old", fields=[ schema_pb2.Field( name="f_int32", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32), ), schema_pb2.Field( name="f_str", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING), ), ]) new_schema = schema_pb2.Schema( encoding_positions_set=True, id="add_test_new", fields=[ schema_pb2.Field( name="f_new_str", type=schema_pb2.FieldType( atomic_type=schema_pb2.STRING, nullable=True), encoding_position=2, ), schema_pb2.Field( name="f_int32", type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32), encoding_position=0, ), schema_pb2.Field( name="f_str", type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING), encoding_position=1, ), ]) Old = named_tuple_from_schema(old_schema) New = named_tuple_from_schema(new_schema) roundtripped = RowCoder(new_schema).decode( RowCoder(old_schema).encode(Old(42, "Hello World!"))) self.assertEqual( New(f_new_str=None, f_int32=42, f_str="Hello World!"), roundtripped) def test_row_coder_fail_early_bad_schema(self): schema_proto = schema_pb2.Schema( fields=[ schema_pb2.Field( name="type_with_no_typeinfo", type=schema_pb2.FieldType()) ]) # Should raise an exception referencing the problem field self.assertRaisesRegex( ValueError, "type_with_no_typeinfo", lambda: RowCoder(schema_proto)) def test_row_coder_cloud_object_schema(self): schema_proto = schema_pb2.Schema() schema_proto_json = json_format.MessageToJson(schema_proto).encode('utf-8') coder = RowCoder(schema_proto) cloud_object = coder.as_cloud_object() self.assertEqual(schema_proto_json, cloud_object['schema'])
def test_from_rfc3339_failure(self): with self.assertRaisesRegexp(ValueError, 'parse'): Timestamp.from_rfc3339('not rfc3339') with self.assertRaisesRegexp(ValueError, 'parse'): Timestamp.from_rfc3339('2016-03-18T23:22:59.123456Z unparseable')