def test_proto_coder(self): # For instructions on how these test proto message were generated, # see coders_test.py ma = test_message.MessageA() mab = ma.field2.add() mab.field1 = True ma.field1 = u'hello world' mb = test_message.MessageA() mb.field1 = u'beam' proto_coder = coders.ProtoCoder(ma.__class__) self.check_coder(proto_coder, ma) self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())), (ma, b'a'), (mb, b'b'))
def test_deterministic_coder(self): coder = coders.FastPrimitivesCoder() deterministic_coder = coders.DeterministicFastPrimitivesCoder( coder, 'step') self.check_coder(deterministic_coder, *self.test_values_deterministic) for v in self.test_values_deterministic: self.check_coder(coders.TupleCoder((deterministic_coder, )), (v, )) self.check_coder( coders.TupleCoder( (deterministic_coder, ) * len(self.test_values_deterministic)), tuple(self.test_values_deterministic)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, dict()) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, [1, dict()]) self.check_coder(coders.TupleCoder((deterministic_coder, coder)), (1, dict()), ('a', [dict()])) self.check_coder(deterministic_coder, test_message.MessageA(field1='value')) if dataclasses is not None: self.check_coder(deterministic_coder, [FrozenDataClass(1, 2), MyNamedTuple(1, 2)]) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, UnFrozenDataClass(1, 2)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, FrozenDataClass(UnFrozenDataClass(1, 2), 3)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, MyNamedTuple(UnFrozenDataClass(1, 2), 3))
def test_iterable_coder(self): iterable_coder = coders.IterableCoder(coders.VarIntCoder()) # Verify cloud object representation self.assertEqual({ '@type': 'kind:stream', 'is_stream_like': True, 'component_encodings': [coders.VarIntCoder().as_cloud_object()] }, iterable_coder.as_cloud_object()) # Test unnested self.check_coder(iterable_coder, [1], [-1, 0, 100]) # Test nested self.check_coder( coders.TupleCoder( (coders.VarIntCoder(), coders.IterableCoder(coders.VarIntCoder()))), (1, [1, 2, 3]))
def test_length_prefix_coder(self): coder = coders.LengthPrefixCoder(coders.BytesCoder()) # Verify cloud object representation self.assertEqual( { '@type': 'kind:length_prefix', 'component_encodings': [coders.BytesCoder().as_cloud_object()] }, coder.as_cloud_object()) # Test binary representation self.assertEqual('\x00', coder.encode('')) self.assertEqual('\x01a', coder.encode('a')) self.assertEqual('\x02bc', coder.encode('bc')) self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383)) # Test unnested self.check_coder(coder, '', 'a', 'bc', 'def') # Test nested self.check_coder(coders.TupleCoder((coder, coder)), ('', 'a'), ('bc', 'def'))
def test_windowedvalue_coder_paneinfo(self): coder = coders.WindowedValueCoder(coders.VarIntCoder(), coders.GlobalWindowCoder()) test_paneinfo_values = [ windowed_value.PANE_INFO_UNKNOWN, windowed_value.PaneInfo(True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1), windowed_value.PaneInfo(True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0), windowed_value.PaneInfo(True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0), windowed_value.PaneInfo(False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23), windowed_value.PaneInfo(False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23), windowed_value.PaneInfo(False, False, windowed_value.PaneInfoTiming.LATE, 0, 123), ] test_values = [ windowed_value.WindowedValue(123, 234, (GlobalWindow(), ), p) for p in test_paneinfo_values ] # Test unnested. self.check_coder( coder, windowed_value.WindowedValue(123, 234, (GlobalWindow(), ), windowed_value.PANE_INFO_UNKNOWN)) for value in test_values: self.check_coder(coder, value) # Test nested. for value1 in test_values: for value2 in test_values: self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
def test_windowed_value_coder(self): coder = coders.WindowedValueCoder( coders.VarIntCoder(), coders.GlobalWindowCoder()) # Verify cloud object representation self.assertEqual({ '@type': 'kind:windowed_value', 'is_wrapper': True, 'component_encodings': [ coders.VarIntCoder().as_cloud_object(), coders.GlobalWindowCoder().as_cloud_object(), ], }, coder.as_cloud_object()) # Test binary representation self.assertEqual( b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', coder.encode(window.GlobalWindows.windowed_value(1))) # Test decoding large timestamp self.assertEqual( coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), ))) # Test unnested self.check_coder( coders.WindowedValueCoder(coders.VarIntCoder()), windowed_value.WindowedValue(3, -100, ()), windowed_value.WindowedValue(-1, 100, (1, 2, 3))) # Test Global Window self.check_coder( coders.WindowedValueCoder( coders.VarIntCoder(), coders.GlobalWindowCoder()), window.GlobalWindows.windowed_value(1)) # Test nested self.check_coder( coders.TupleCoder(( coders.WindowedValueCoder(coders.FloatCoder()), coders.WindowedValueCoder(coders.StrUtf8Coder()))), ( windowed_value.WindowedValue(1.5, 0, ()), windowed_value.WindowedValue("abc", 10, ('window', ))))
def test_nested_observables(self): class FakeObservableIterator(observable.ObservableMixin): def __iter__(self): return iter([1, 2, 3]) # Coder for elements from the observable iterator. elem_coder = coders.VarIntCoder() iter_coder = coders.TupleSequenceCoder(elem_coder) # Test nested WindowedValue observable. coder = coders.WindowedValueCoder(iter_coder) observ = FakeObservableIterator() value = windowed_value.WindowedValue(observ, 0, ()) self.assertEqual( coder.get_impl().get_estimated_size_and_observables(value)[1], [(observ, elem_coder.get_impl())]) # Test nested tuple observable. coder = coders.TupleCoder((coders.StrUtf8Coder(), iter_coder)) value = (u'123', observ) self.assertEqual( coder.get_impl().get_estimated_size_and_observables(value)[1], [(observ, elem_coder.get_impl())])
def test_custom_coder(self): self.check_coder(CustomCoder(), 1, -10, 5) self.check_coder( coders.TupleCoder((CustomCoder(), coders.BytesCoder())), (1, 'a'), (-10, 'b'), (5, 'c'))
def test_tuple_sequence_coder(self): int_tuple_coder = coders.TupleSequenceCoder(coders.VarIntCoder()) self.check_coder(int_tuple_coder, (1, -1, 0), (), tuple(range(1000))) self.check_coder( coders.TupleCoder((coders.VarIntCoder(), int_tuple_coder)), (1, (1, 2, 3)))
def test_dill_coder(self): cell_value = (lambda x: lambda: x)(0).func_closure[0] self.check_coder(coders.DillCoder(), 'a', 1, cell_value) self.check_coder( coders.TupleCoder((coders.VarIntCoder(), coders.DillCoder())), (1, cell_value))
def test_fast_primitives_coder(self): coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len)) self.check_coder(coder, *self.test_values) for v in self.test_values: self.check_coder(coders.TupleCoder((coder, )), (v, ))
class StandardCodersTest(unittest.TestCase): _urn_to_coder_class = { 'urn:beam:coders:bytes:0.1': coders.BytesCoder, 'urn:beam:coders:varint:0.1': coders.VarIntCoder, 'urn:beam:coders:kv:0.1': lambda k, v: coders.TupleCoder((k, v)), 'urn:beam:coders:interval_window:0.1': coders.IntervalWindowCoder, 'urn:beam:coders:stream:0.1': lambda t: coders.IterableCoder(t), 'urn:beam:coders:global_window:0.1': coders.GlobalWindowCoder, 'urn:beam:coders:windowed_value:0.1': lambda v, w: coders.WindowedValueCoder(v, w) } _urn_to_json_value_parser = { 'urn:beam:coders:bytes:0.1': lambda x: x, 'urn:beam:coders:varint:0.1': lambda x: x, 'urn:beam:coders:kv:0.1': lambda x, key_parser, value_parser: (key_parser(x['key']), value_parser(x['value'])), 'urn:beam:coders:interval_window:0.1': lambda x: IntervalWindow( start=Timestamp(micros=(x['end'] - x['span']) * 1000), end=Timestamp(micros=x['end'] * 1000)), 'urn:beam:coders:stream:0.1': lambda x, parser: map(parser, x), 'urn:beam:coders:global_window:0.1': lambda x: window.GlobalWindow(), 'urn:beam:coders:windowed_value:0.1': lambda x, value_parser, window_parser: windowed_value.create( value_parser(x['value']), x['timestamp'] * 1000, tuple([window_parser(w) for w in x['windows']])) } def test_standard_coders(self): for name, spec in _load_test_cases(STANDARD_CODERS_YAML): logging.info('Executing %s test.', name) self._run_standard_coder(name, spec) def _run_standard_coder(self, name, spec): coder = self.parse_coder(spec['coder']) parse_value = self.json_value_parser(spec['coder']) nested_list = [spec['nested']] if 'nested' in spec else [True, False] for nested in nested_list: for expected_encoded, json_value in spec['examples'].items(): value = parse_value(json_value) expected_encoded = expected_encoded.encode('latin1') if not spec['coder'].get('non_deterministic', False): actual_encoded = encode_nested(coder, value, nested) if self.fix and actual_encoded != expected_encoded: self.to_fix[spec['index'], expected_encoded] = actual_encoded else: self.assertEqual(expected_encoded, actual_encoded) self.assertEqual(decode_nested(coder, expected_encoded, nested), value) else: # Only verify decoding for a non-deterministic coder self.assertEqual(decode_nested(coder, expected_encoded, nested), value) def parse_coder(self, spec): return self._urn_to_coder_class[spec['urn']]( *[self.parse_coder(c) for c in spec.get('components', ())]) def json_value_parser(self, coder_spec): component_parsers = [ self.json_value_parser(c) for c in coder_spec.get('components', ())] return lambda x: self._urn_to_json_value_parser[coder_spec['urn']]( x, *component_parsers) # Used when --fix is passed. fix = False to_fix = {} @classmethod def tearDownClass(cls): if cls.fix and cls.to_fix: print "FIXING", len(cls.to_fix), "TESTS" doc_sep = '\n---\n' docs = open(STANDARD_CODERS_YAML).read().split(doc_sep) def quote(s): return json.dumps(s.decode('latin1')).replace(r'\u0000', r'\0') for (doc_ix, expected_encoded), actual_encoded in cls.to_fix.items(): print quote(expected_encoded), "->", quote(actual_encoded) docs[doc_ix] = docs[doc_ix].replace( quote(expected_encoded) + ':', quote(actual_encoded) + ':') open(STANDARD_CODERS_YAML, 'w').write(doc_sep.join(docs))
def test_fake_deterministic_fast_primitives_coder(self): coder = coders.FakeDeterministicFastPrimitivesCoder( coders.PickleCoder()) self.check_coder(coder, *self.test_values) for v in self.test_values: self.check_coder(coders.TupleCoder((coder, )), (v, ))