def run_coder_benchmarks(num_runs, input_size, seed, verbose): random.seed(seed) # TODO(BEAM-4441): Pick coders using type hints, for example: # tuple_coder = typecoders.registry.get_coder(typehints.Tuple[int, ...]) benchmarks = [ coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), small_list), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_list), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder()), wv_with_one_window), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder()), wv_with_multiple_windows), ] suite = [ utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks ] utils.run_benchmarks(suite, verbose=verbose)
def run_coder_benchmarks(num_runs, input_size, seed, verbose, filter_regex='.*'): random.seed(seed) # TODO(BEAM-4441): Pick coders using type hints, for example: # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...]) benchmarks = [ coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), small_list), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_iterable), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder()), wv_with_one_window), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.IntervalWindowCoder()), wv_with_multiple_windows), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.GlobalWindowCoder()), globally_windowed_value), coder_benchmark_factory( coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int) ] suite = [ utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks if re.search(filter_regex, b.__name__, flags=re.I) ] utils.run_benchmarks(suite, verbose=verbose)
def test_iterable_coder(self): real_coder = typecoders.registry.get_coder(typehints.Iterable[str]) expected_coder = coders.IterableCoder(coders.BytesCoder()) values = ['abc', 'xyz'] self.assertEqual(expected_coder, real_coder) self.assertEqual(real_coder.encode(values), expected_coder.encode(values))
def test_iterable_coder(self): iterable_coder = coders.IterableCoder(coders.VarIntCoder()) # Verify cloud object representation self.assertEqual({ '@type': 'kind:stream', 'is_stream_like': True, 'component_encodings': [coders.VarIntCoder().as_cloud_object()] }, iterable_coder.as_cloud_object()) # Test unnested self.check_coder(iterable_coder, [1], [-1, 0, 100]) # Test nested self.check_coder( coders.TupleCoder( (coders.VarIntCoder(), coders.IterableCoder(coders.VarIntCoder()))), (1, [1, 2, 3]))
def _test_iterable_coder_of_unknown_length(self, count): def iter_generator(count): for i in range(count): yield i iterable_coder = coders.IterableCoder(coders.VarIntCoder()) self.assertCountEqual(list(iter_generator(count)), iterable_coder.decode( iterable_coder.encode(iter_generator(count))))
def test_list_coder(self): real_coder = typecoders.registry.get_coder(typehints.List[bytes]) expected_coder = coders.IterableCoder(coders.BytesCoder()) values = [b'abc', b'xyz'] self.assertEqual(expected_coder, real_coder) self.assertEqual(real_coder.encode(values), expected_coder.encode(values)) # IterableCoder.decode() always returns a list. Its implementation, # IterableCoderImpl, *can* return a non-list if it is provided a read_state # object, but this is not possible using the atomic IterableCoder interface. self.assertIs( list, type(expected_coder.decode(expected_coder.encode(values))))
def __init__(self, num_elements_per_benchmark): self._coder = coders.IterableCoder(coder) self._list = [generate_fn() for _ in range(num_elements_per_benchmark)]
class StandardCodersTest(unittest.TestCase): _urn_to_coder_class = { 'urn:beam:coders:bytes:0.1': coders.BytesCoder, 'urn:beam:coders:varint:0.1': coders.VarIntCoder, 'urn:beam:coders:kv:0.1': lambda k, v: coders.TupleCoder((k, v)), 'urn:beam:coders:interval_window:0.1': coders.IntervalWindowCoder, 'urn:beam:coders:stream:0.1': lambda t: coders.IterableCoder(t), 'urn:beam:coders:global_window:0.1': coders.GlobalWindowCoder, 'urn:beam:coders:windowed_value:0.1': lambda v, w: coders.WindowedValueCoder(v, w) } _urn_to_json_value_parser = { 'urn:beam:coders:bytes:0.1': lambda x: x, 'urn:beam:coders:varint:0.1': lambda x: x, 'urn:beam:coders:kv:0.1': lambda x, key_parser, value_parser: (key_parser(x['key']), value_parser(x['value'])), 'urn:beam:coders:interval_window:0.1': lambda x: IntervalWindow( start=Timestamp(micros=(x['end'] - x['span']) * 1000), end=Timestamp(micros=x['end'] * 1000)), 'urn:beam:coders:stream:0.1': lambda x, parser: map(parser, x), 'urn:beam:coders:global_window:0.1': lambda x: window.GlobalWindow(), 'urn:beam:coders:windowed_value:0.1': lambda x, value_parser, window_parser: windowed_value.create( value_parser(x['value']), x['timestamp'] * 1000, tuple([window_parser(w) for w in x['windows']])) } def test_standard_coders(self): for name, spec in _load_test_cases(STANDARD_CODERS_YAML): logging.info('Executing %s test.', name) self._run_standard_coder(name, spec) def _run_standard_coder(self, name, spec): coder = self.parse_coder(spec['coder']) parse_value = self.json_value_parser(spec['coder']) nested_list = [spec['nested']] if 'nested' in spec else [True, False] for nested in nested_list: for expected_encoded, json_value in spec['examples'].items(): value = parse_value(json_value) expected_encoded = expected_encoded.encode('latin1') if not spec['coder'].get('non_deterministic', False): actual_encoded = encode_nested(coder, value, nested) if self.fix and actual_encoded != expected_encoded: self.to_fix[spec['index'], expected_encoded] = actual_encoded else: self.assertEqual(expected_encoded, actual_encoded) self.assertEqual(decode_nested(coder, expected_encoded, nested), value) else: # Only verify decoding for a non-deterministic coder self.assertEqual(decode_nested(coder, expected_encoded, nested), value) def parse_coder(self, spec): return self._urn_to_coder_class[spec['urn']]( *[self.parse_coder(c) for c in spec.get('components', ())]) def json_value_parser(self, coder_spec): component_parsers = [ self.json_value_parser(c) for c in coder_spec.get('components', ())] return lambda x: self._urn_to_json_value_parser[coder_spec['urn']]( x, *component_parsers) # Used when --fix is passed. fix = False to_fix = {} @classmethod def tearDownClass(cls): if cls.fix and cls.to_fix: print "FIXING", len(cls.to_fix), "TESTS" doc_sep = '\n---\n' docs = open(STANDARD_CODERS_YAML).read().split(doc_sep) def quote(s): return json.dumps(s.decode('latin1')).replace(r'\u0000', r'\0') for (doc_ix, expected_encoded), actual_encoded in cls.to_fix.items(): print quote(expected_encoded), "->", quote(actual_encoded) docs[doc_ix] = docs[doc_ix].replace( quote(expected_encoded) + ':', quote(actual_encoded) + ':') open(STANDARD_CODERS_YAML, 'w').write(doc_sep.join(docs))