コード例 #1
0
def run_coder_benchmarks(num_runs, input_size, seed, verbose):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typehints.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_multiple_windows),
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
    ]
    utils.run_benchmarks(suite, verbose=verbose)
コード例 #2
0
def run_coder_benchmarks(num_runs,
                         input_size,
                         seed,
                         verbose,
                         filter_regex='.*'):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()),
            large_iterable),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                small_message_with_map),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                large_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            small_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            large_message_with_map),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.IntervalWindowCoder()),
            wv_with_multiple_windows),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.GlobalWindowCoder()),
            globally_windowed_value),
        coder_benchmark_factory(
            coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int)
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
        if re.search(filter_regex, b.__name__, flags=re.I)
    ]
    utils.run_benchmarks(suite, verbose=verbose)
コード例 #3
0
 def test_iterable_coder(self):
     real_coder = typecoders.registry.get_coder(typehints.Iterable[str])
     expected_coder = coders.IterableCoder(coders.BytesCoder())
     values = ['abc', 'xyz']
     self.assertEqual(expected_coder, real_coder)
     self.assertEqual(real_coder.encode(values),
                      expected_coder.encode(values))
コード例 #4
0
 def test_iterable_coder(self):
   iterable_coder = coders.IterableCoder(coders.VarIntCoder())
   # Verify cloud object representation
   self.assertEqual({
       '@type': 'kind:stream',
       'is_stream_like': True,
       'component_encodings': [coders.VarIntCoder().as_cloud_object()]
   },
                    iterable_coder.as_cloud_object())
   # Test unnested
   self.check_coder(iterable_coder, [1], [-1, 0, 100])
   # Test nested
   self.check_coder(
       coders.TupleCoder(
           (coders.VarIntCoder(), coders.IterableCoder(coders.VarIntCoder()))),
       (1, [1, 2, 3]))
コード例 #5
0
ファイル: coders_test_common.py プロジェクト: yjshen/beam
  def _test_iterable_coder_of_unknown_length(self, count):
    def iter_generator(count):
      for i in range(count):
        yield i

    iterable_coder = coders.IterableCoder(coders.VarIntCoder())
    self.assertCountEqual(list(iter_generator(count)),
                          iterable_coder.decode(
                              iterable_coder.encode(iter_generator(count))))
コード例 #6
0
 def test_list_coder(self):
   real_coder = typecoders.registry.get_coder(typehints.List[bytes])
   expected_coder = coders.IterableCoder(coders.BytesCoder())
   values = [b'abc', b'xyz']
   self.assertEqual(expected_coder, real_coder)
   self.assertEqual(real_coder.encode(values), expected_coder.encode(values))
   # IterableCoder.decode() always returns a list.  Its implementation,
   # IterableCoderImpl, *can* return a non-list if it is provided a read_state
   # object, but this is not possible using the atomic IterableCoder interface.
   self.assertIs(
       list, type(expected_coder.decode(expected_coder.encode(values))))
コード例 #7
0
 def __init__(self, num_elements_per_benchmark):
   self._coder = coders.IterableCoder(coder)
   self._list = [generate_fn() for _ in range(num_elements_per_benchmark)]
コード例 #8
0
class StandardCodersTest(unittest.TestCase):

  _urn_to_coder_class = {
      'urn:beam:coders:bytes:0.1': coders.BytesCoder,
      'urn:beam:coders:varint:0.1': coders.VarIntCoder,
      'urn:beam:coders:kv:0.1': lambda k, v: coders.TupleCoder((k, v)),
      'urn:beam:coders:interval_window:0.1': coders.IntervalWindowCoder,
      'urn:beam:coders:stream:0.1': lambda t: coders.IterableCoder(t),
      'urn:beam:coders:global_window:0.1': coders.GlobalWindowCoder,
      'urn:beam:coders:windowed_value:0.1':
          lambda v, w: coders.WindowedValueCoder(v, w)
  }

  _urn_to_json_value_parser = {
      'urn:beam:coders:bytes:0.1': lambda x: x,
      'urn:beam:coders:varint:0.1': lambda x: x,
      'urn:beam:coders:kv:0.1':
          lambda x, key_parser, value_parser: (key_parser(x['key']),
                                               value_parser(x['value'])),
      'urn:beam:coders:interval_window:0.1':
          lambda x: IntervalWindow(
              start=Timestamp(micros=(x['end'] - x['span']) * 1000),
              end=Timestamp(micros=x['end'] * 1000)),
      'urn:beam:coders:stream:0.1': lambda x, parser: map(parser, x),
      'urn:beam:coders:global_window:0.1': lambda x: window.GlobalWindow(),
      'urn:beam:coders:windowed_value:0.1':
          lambda x, value_parser, window_parser: windowed_value.create(
              value_parser(x['value']), x['timestamp'] * 1000,
              tuple([window_parser(w) for w in x['windows']]))
  }

  def test_standard_coders(self):
    for name, spec in _load_test_cases(STANDARD_CODERS_YAML):
      logging.info('Executing %s test.', name)
      self._run_standard_coder(name, spec)

  def _run_standard_coder(self, name, spec):
    coder = self.parse_coder(spec['coder'])
    parse_value = self.json_value_parser(spec['coder'])
    nested_list = [spec['nested']] if 'nested' in spec else [True, False]
    for nested in nested_list:
      for expected_encoded, json_value in spec['examples'].items():
        value = parse_value(json_value)
        expected_encoded = expected_encoded.encode('latin1')
        if not spec['coder'].get('non_deterministic', False):
          actual_encoded = encode_nested(coder, value, nested)
          if self.fix and actual_encoded != expected_encoded:
            self.to_fix[spec['index'], expected_encoded] = actual_encoded
          else:
            self.assertEqual(expected_encoded, actual_encoded)
            self.assertEqual(decode_nested(coder, expected_encoded, nested),
                             value)
        else:
          # Only verify decoding for a non-deterministic coder
          self.assertEqual(decode_nested(coder, expected_encoded, nested),
                           value)

  def parse_coder(self, spec):
    return self._urn_to_coder_class[spec['urn']](
        *[self.parse_coder(c) for c in spec.get('components', ())])

  def json_value_parser(self, coder_spec):
    component_parsers = [
        self.json_value_parser(c) for c in coder_spec.get('components', ())]
    return lambda x: self._urn_to_json_value_parser[coder_spec['urn']](
        x, *component_parsers)

  # Used when --fix is passed.

  fix = False
  to_fix = {}

  @classmethod
  def tearDownClass(cls):
    if cls.fix and cls.to_fix:
      print "FIXING", len(cls.to_fix), "TESTS"
      doc_sep = '\n---\n'
      docs = open(STANDARD_CODERS_YAML).read().split(doc_sep)

      def quote(s):
        return json.dumps(s.decode('latin1')).replace(r'\u0000', r'\0')
      for (doc_ix, expected_encoded), actual_encoded in cls.to_fix.items():
        print quote(expected_encoded), "->", quote(actual_encoded)
        docs[doc_ix] = docs[doc_ix].replace(
            quote(expected_encoded) + ':', quote(actual_encoded) + ':')
      open(STANDARD_CODERS_YAML, 'w').write(doc_sep.join(docs))