Ejemplo n.º 1
0
 def test_list_coder(self):
     list_coder = coders.ListCoder(coders.VarIntCoder())
     # Test unnested
     self.check_coder(list_coder, [1], [-1, 0, 100])
     # Test nested
     self.check_coder(coders.TupleCoder((coders.VarIntCoder(), list_coder)),
                      (1, [1, 2, 3]))
Ejemplo n.º 2
0
 def test_tuple_coder(self):
     kv_coder = coders.TupleCoder(
         (coders.VarIntCoder(), coders.BytesCoder()))
     # Verify cloud object representation
     self.assertEqual(
         {
             '@type':
             'kind:pair',
             'is_pair_like':
             True,
             'component_encodings': [
                 coders.VarIntCoder().as_cloud_object(),
                 coders.BytesCoder().as_cloud_object()
             ],
         }, kv_coder.as_cloud_object())
     # Test binary representation
     self.assertEqual('\x04abc', kv_coder.encode((4, 'abc')))
     # Test unnested
     self.check_coder(kv_coder, (1, 'a'), (-2, 'a' * 100),
                      (300, 'abc\0' * 5))
     # Test nested
     self.check_coder(
         coders.TupleCoder((coders.TupleCoder(
             (coders.PickleCoder(), coders.VarIntCoder())),
                            coders.StrUtf8Coder())), ((1, 2), 'a'),
         ((-2, 5), u'a\u0101' * 100), ((300, 1), 'abc\0' * 5))
Ejemplo n.º 3
0
 def test_varint_coder(self):
   # Small ints.
   self.check_coder(coders.VarIntCoder(), *range(-10, 10))
   # Multi-byte encoding starts at 128
   self.check_coder(coders.VarIntCoder(), *range(120, 140))
   # Large values
   MAX_64_BIT_INT = 0x7fffffffffffffff
   self.check_coder(coders.VarIntCoder(),
                    *[int(math.pow(-1, k) * math.exp(k))
                      for k in range(0, int(math.log(MAX_64_BIT_INT)))])
Ejemplo n.º 4
0
  def test_param_windowed_value_coder(self):
    from apache_beam.transforms.window import IntervalWindow
    from apache_beam.utils.windowed_value import PaneInfo
    wv = windowed_value.create(
        b'',
        # Milliseconds to microseconds
        1000 * 1000,
        (IntervalWindow(11, 21),),
        PaneInfo(True, False, 1, 2, 3))
    windowed_value_coder = coders.WindowedValueCoder(
        coders.BytesCoder(), coders.IntervalWindowCoder())
    payload = windowed_value_coder.encode(wv)
    coder = coders.ParamWindowedValueCoder(
        payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()])

    # Test binary representation
    self.assertEqual(b'\x01',
                     coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test unnested
    self.check_coder(
        coders.ParamWindowedValueCoder(
            payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]),
        windowed_value.WindowedValue(
            3,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
        windowed_value.WindowedValue(
            1,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.FloatCoder(),
                    coders.IntervalWindowCoder()]),
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.StrUtf8Coder(),
                    coders.IntervalWindowCoder()]))),
        (windowed_value.WindowedValue(
            1.5,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
         windowed_value.WindowedValue(
             "abc",
             1,
             (window.IntervalWindow(11, 21),),
             PaneInfo(True, False, 1, 2, 3))))
Ejemplo n.º 5
0
    def test_sharded_key_coder(self):
        key_and_coders = [(b'', b'\x00', coders.BytesCoder()),
                          (b'key', b'\x03key', coders.BytesCoder()),
                          ('key', b'\03\x6b\x65\x79', coders.StrUtf8Coder()),
                          (('k', 1), b'\x01\x6b\x01',
                           coders.TupleCoder(
                               (coders.StrUtf8Coder(), coders.VarIntCoder())))]

        for key, bytes_repr, key_coder in key_and_coders:
            coder = coders.ShardedKeyCoder(key_coder)
            # Verify cloud object representation
            self.assertEqual(
                {
                    '@type': 'kind:sharded_key',
                    'component_encodings': [key_coder.as_cloud_object()]
                }, coder.as_cloud_object())
            self.assertEqual(b'\x00' + bytes_repr,
                             coder.encode(ShardedKey(key, b'')))
            self.assertEqual(b'\x03123' + bytes_repr,
                             coder.encode(ShardedKey(key, b'123')))

            # Test unnested
            self.check_coder(coder, ShardedKey(key, b''))
            self.check_coder(coder, ShardedKey(key, b'123'))

            for other_key, _, other_key_coder in key_and_coders:
                other_coder = coders.ShardedKeyCoder(other_key_coder)
                # Test nested
                self.check_coder(
                    coders.TupleCoder((coder, other_coder)),
                    (ShardedKey(key, b''), ShardedKey(other_key, b'')))
                self.check_coder(
                    coders.TupleCoder((coder, other_coder)),
                    (ShardedKey(key, b'123'), ShardedKey(other_key, b'')))
Ejemplo n.º 6
0
def int64_user_gauge(namespace, name, metric, ptransform=None, tag=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return the gauge monitoring info for the URN, metric and labels.

  Args:
    namespace: User-defined namespace of counter.
    name: Name of counter.
    metric: The GaugeData containing the metrics.
    ptransform: The ptransform/step name used as a label.
    tag: The output tag name, used as a label.
  """
    labels = create_labels(ptransform=ptransform,
                           tag=tag,
                           namespace=namespace,
                           name=name)
    if isinstance(metric, GaugeData):
        coder = coders.VarIntCoder()
        value = metric.value
        timestamp = metric.timestamp
    else:
        raise TypeError(
            'Expected GaugeData metric type but received %s with value %s' %
            (type(metric), metric))
    payload = _encode_gauge(coder, timestamp, value)
    return create_monitoring_info(USER_GAUGE_URN, LATEST_INT64_TYPE, payload,
                                  labels)
Ejemplo n.º 7
0
    def test_state_backed_iterable_coder(self):
        # pylint: disable=global-variable-undefined
        # required for pickling by reference
        global state
        state = {}

        def iterable_state_write(values, element_coder_impl):
            token = b'state_token_%d' % len(state)
            state[token] = [element_coder_impl.encode(e) for e in values]
            return token

        def iterable_state_read(token, element_coder_impl):
            return [element_coder_impl.decode(s) for s in state[token]]

        coder = coders.StateBackedIterableCoder(
            coders.VarIntCoder(),
            read_state=iterable_state_read,
            write_state=iterable_state_write,
            write_state_threshold=1)
        # Note: do not use check_coder
        # see https://github.com/cloudpipe/cloudpickle/issues/452
        self._observe(coder)
        self.assertEqual([1, 2, 3], coder.decode(coder.encode([1, 2, 3])))
        # Ensure that state was actually used.
        self.assertNotEqual(state, {})
        tupleCoder = coders.TupleCoder((coder, coder))
        self._observe(tupleCoder)
        self.assertEqual(([1], [2, 3]),
                         tupleCoder.decode(tupleCoder.encode(([1], [2, 3]))))
Ejemplo n.º 8
0
 def test_iterable_coder(self):
   iterable_coder = coders.IterableCoder(coders.VarIntCoder())
   # Verify cloud object representation
   self.assertEqual({
       '@type': 'kind:stream',
       'is_stream_like': True,
       'component_encodings': [coders.VarIntCoder().as_cloud_object()]
   },
                    iterable_coder.as_cloud_object())
   # Test unnested
   self.check_coder(iterable_coder, [1], [-1, 0, 100])
   # Test nested
   self.check_coder(
       coders.TupleCoder(
           (coders.VarIntCoder(), coders.IterableCoder(coders.VarIntCoder()))),
       (1, [1, 2, 3]))
Ejemplo n.º 9
0
def _decode_gauge(coder, payload):
    """Returns a tuple of (timestamp, value)."""
    timestamp_coder = coders.VarIntCoder().get_impl()
    stream = coder_impl.create_InputStream(payload)
    time_ms = timestamp_coder.decode_from_stream(stream, True)
    return (time_ms / 1000.0,
            coder.get_impl().decode_from_stream(stream, True))
Ejemplo n.º 10
0
def extract_gauge_value(monitoring_info_proto):
    """Returns a tuple containing (timestamp, value)"""
    if not is_gauge(monitoring_info_proto):
        raise ValueError('Unsupported type %s' % monitoring_info_proto.type)

    # Only LATEST_INT64_TYPE is currently supported.
    return _decode_gauge(coders.VarIntCoder(), monitoring_info_proto.payload)
Ejemplo n.º 11
0
  def test_windowedvalue_coder_paneinfo(self):
    coder = coders.WindowedValueCoder(coders.VarIntCoder(),
                                      coders.GlobalWindowCoder())
    test_paneinfo_values = [
        windowed_value.PANE_INFO_UNKNOWN,
        windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23),
        windowed_value.PaneInfo(
            False, False, windowed_value.PaneInfoTiming.LATE, 0, 123),]

    test_values = [windowed_value.WindowedValue(123, 234, (GlobalWindow(),), p)
                   for p in test_paneinfo_values]

    # Test unnested.
    self.check_coder(coder, windowed_value.WindowedValue(
        123, 234, (GlobalWindow(),), windowed_value.PANE_INFO_UNKNOWN))
    for value in test_values:
      self.check_coder(coder, value)

    # Test nested.
    for value1 in test_values:
      for value2 in test_values:
        self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
Ejemplo n.º 12
0
    def test_state_backed_iterable_coder(self):
        # pylint: disable=global-variable-undefined
        # required for pickling by reference
        global state
        state = {}

        def iterable_state_write(values, element_coder_impl):
            token = b'state_token_%d' % len(state)
            state[token] = [element_coder_impl.encode(e) for e in values]
            return token

        def iterable_state_read(token, element_coder_impl):
            return [element_coder_impl.decode(s) for s in state[token]]

        coder = coders.StateBackedIterableCoder(
            coders.VarIntCoder(),
            read_state=iterable_state_read,
            write_state=iterable_state_write,
            write_state_threshold=1)
        context = pipeline_context.PipelineContext(
            iterable_state_read=iterable_state_read,
            iterable_state_write=iterable_state_write)
        self.check_coder(coder, [1, 2, 3],
                         context=context,
                         test_size_estimation=False)
        # Ensure that state was actually used.
        self.assertNotEqual(state, {})
        self.check_coder(coders.TupleCoder((coder, coder)), ([1], [2, 3]),
                         context=context,
                         test_size_estimation=False)
Ejemplo n.º 13
0
def extract_counter_value(monitoring_info_proto):
    """Returns the counter value of the monitoring info."""
    if not is_counter(monitoring_info_proto):
        raise ValueError('Unsupported type %s' % monitoring_info_proto.type)

    # Only SUM_INT64_TYPE is currently supported.
    return coders.VarIntCoder().decode(monitoring_info_proto.payload)
Ejemplo n.º 14
0
 def test_map_coder(self):
     self.check_coder(
         coders.MapCoder(coders.VarIntCoder(), coders.StrUtf8Coder()), {
             1: "one",
             300: "three hundred"
         }, {}, {i: str(i)
                 for i in range(5000)})
Ejemplo n.º 15
0
 def test_timer_coder(self):
   self.check_coder(coders._TimerCoder(coders.BytesCoder()),
                    *[{'timestamp': timestamp.Timestamp(micros=x),
                       'payload': b'xyz'}
                      for x in (-3000, 0, 3000)])
   self.check_coder(
       coders.TupleCoder((coders._TimerCoder(coders.VarIntCoder()),)),
       ({'timestamp': timestamp.Timestamp.of(37000), 'payload': 389},))
Ejemplo n.º 16
0
def _decode_distribution(value_coder, payload):
    """Returns a tuple of (count, sum, min, max)."""
    count_coder = coders.VarIntCoder().get_impl()
    value_coder = value_coder.get_impl()
    stream = coder_impl.create_InputStream(payload)
    return (count_coder.decode_from_stream(stream, True),
            value_coder.decode_from_stream(stream, True),
            value_coder.decode_from_stream(stream, True),
            value_coder.decode_from_stream(stream, True))
Ejemplo n.º 17
0
  def _test_iterable_coder_of_unknown_length(self, count):
    def iter_generator(count):
      for i in range(count):
        yield i

    iterable_coder = coders.IterableCoder(coders.VarIntCoder())
    self.assertCountEqual(list(iter_generator(count)),
                          iterable_coder.decode(
                              iterable_coder.encode(iter_generator(count))))
Ejemplo n.º 18
0
def _encode_distribution(value_coder, count, sum, min, max):
    count_coder = coders.VarIntCoder().get_impl()
    value_coder = value_coder.get_impl()
    stream = coder_impl.create_OutputStream()
    count_coder.encode_to_stream(count, stream, True)
    value_coder.encode_to_stream(sum, stream, True)
    value_coder.encode_to_stream(min, stream, True)
    value_coder.encode_to_stream(max, stream, True)
    return stream.get()
Ejemplo n.º 19
0
 def test_standard_int_coder(self):
     real_coder = typecoders.registry.get_coder(int)
     expected_coder = coders.VarIntCoder()
     self.assertEqual(real_coder.encode(0x0404),
                      expected_coder.encode(0x0404))
     self.assertEqual(0x0404, real_coder.decode(real_coder.encode(0x0404)))
     self.assertEqual(real_coder.encode(0x040404040404),
                      expected_coder.encode(0x040404040404))
     self.assertEqual(0x040404040404,
                      real_coder.decode(real_coder.encode(0x040404040404)))
Ejemplo n.º 20
0
  def test_windowed_value_coder(self):
    coder = coders.WindowedValueCoder(
        coders.VarIntCoder(), coders.GlobalWindowCoder())
    # Verify cloud object representation
    self.assertEqual({
        '@type': 'kind:windowed_value',
        'is_wrapper': True,
        'component_encodings': [
            coders.VarIntCoder().as_cloud_object(),
            coders.GlobalWindowCoder().as_cloud_object(),
        ],
    },
                     coder.as_cloud_object())
    # Test binary representation
    self.assertEqual(
        b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
        coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test decoding large timestamp
    self.assertEqual(
        coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
        windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), )))

    # Test unnested
    self.check_coder(
        coders.WindowedValueCoder(coders.VarIntCoder()),
        windowed_value.WindowedValue(3, -100, ()),
        windowed_value.WindowedValue(-1, 100, (1, 2, 3)))

    # Test Global Window
    self.check_coder(
        coders.WindowedValueCoder(
            coders.VarIntCoder(), coders.GlobalWindowCoder()),
        window.GlobalWindows.windowed_value(1))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.WindowedValueCoder(coders.FloatCoder()),
            coders.WindowedValueCoder(coders.StrUtf8Coder()))),
        (
            windowed_value.WindowedValue(1.5, 0, ()),
            windowed_value.WindowedValue("abc", 10, ('window', ))))
Ejemplo n.º 21
0
def distribution_payload_combiner(payload_a, payload_b):
  coder = coders.VarIntCoder()
  (count_a, sum_a, min_a, max_a) = _decode_distribution(coder, payload_a)
  (count_b, sum_b, min_b, max_b) = _decode_distribution(coder, payload_b)
  return _encode_distribution(
      coder,
      count_a + count_b,
      sum_a + sum_b,
      min(min_a, min_b),
      max(max_a, max_b))
Ejemplo n.º 22
0
def extract_distribution(monitoring_info_proto):
    """Returns a tuple of (count, sum, min, max).

  Args:
    proto: The monitoring info for the distribution.
  """
    if not is_distribution(monitoring_info_proto):
        raise ValueError('Unsupported type %s' % monitoring_info_proto.type)

    # Only DISTRIBUTION_INT64_TYPE is currently supported.
    return _decode_distribution(coders.VarIntCoder(),
                                monitoring_info_proto.payload)
Ejemplo n.º 23
0
    def test_sharded_key_coder(self):
        key_and_coders = [(b'', b'\x00', coders.BytesCoder()),
                          (b'key', b'\x03key', coders.BytesCoder()),
                          ('key', b'\03\x6b\x65\x79', coders.StrUtf8Coder()),
                          (('k', 1), b'\x01\x6b\x01',
                           coders.TupleCoder(
                               (coders.StrUtf8Coder(), coders.VarIntCoder())))]

        for key, bytes_repr, key_coder in key_and_coders:
            coder = coders.ShardedKeyCoder(key_coder)
            # Verify cloud object representation
            self.assertEqual(
                {
                    '@type': 'kind:sharded_key',
                    'component_encodings': [key_coder.as_cloud_object()]
                }, coder.as_cloud_object())

            # Test str repr
            self.assertEqual('%s' % coder, 'ShardedKeyCoder[%s]' % key_coder)

            self.assertEqual(b'\x00' + bytes_repr,
                             coder.encode(ShardedKey(key, b'')))
            self.assertEqual(b'\x03123' + bytes_repr,
                             coder.encode(ShardedKey(key, b'123')))

            # Test unnested
            self.check_coder(coder, ShardedKey(key, b''))
            self.check_coder(coder, ShardedKey(key, b'123'))

            # Test type hints
            self.assertTrue(
                isinstance(coder.to_type_hint(),
                           sharded_key_type.ShardedKeyTypeConstraint))
            key_type = coder.to_type_hint().key_type
            if isinstance(key_type, typehints.TupleConstraint):
                self.assertEqual(key_type.tuple_types,
                                 (type(key[0]), type(key[1])))
            else:
                self.assertEqual(key_type, type(key))
            self.assertEqual(
                coders.ShardedKeyCoder.from_type_hint(
                    coder.to_type_hint(), typecoders.CoderRegistry()), coder)

            for other_key, _, other_key_coder in key_and_coders:
                other_coder = coders.ShardedKeyCoder(other_key_coder)
                # Test nested
                self.check_coder(
                    coders.TupleCoder((coder, other_coder)),
                    (ShardedKey(key, b''), ShardedKey(other_key, b'')))
                self.check_coder(
                    coders.TupleCoder((coder, other_coder)),
                    (ShardedKey(key, b'123'), ShardedKey(other_key, b'')))
Ejemplo n.º 24
0
def int64_user_distribution(namespace, name, metric, ptransform=None):
  """Return the distribution monitoring info for the URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The DistributionData for the metric.
    ptransform: The ptransform id used as a label.
  """
  labels = create_labels(ptransform=ptransform, namespace=namespace, name=name)
  payload = _encode_distribution(
      coders.VarIntCoder(), metric.count, metric.sum, metric.min, metric.max)
  return create_monitoring_info(
      USER_DISTRIBUTION_URN, DISTRIBUTION_INT64_TYPE, payload, labels)
Ejemplo n.º 25
0
 def test_map_coder(self):
     values = [
         {
             1: "one",
             300: "three hundred"
         },  # force yapf to be nice
         {},
         {i: str(i)
          for i in range(5000)}
     ]
     map_coder = coders.MapCoder(coders.VarIntCoder(),
                                 coders.StrUtf8Coder())
     self.check_coder(map_coder, *values)
     self.check_coder(map_coder.as_deterministic_coder("label"), *values)
Ejemplo n.º 26
0
def int64_counter(urn, metric, ptransform=None, tag=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return the counter monitoring info for the specifed URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The payload field to use in the monitoring info or an int value.
    ptransform: The ptransform/step name used as a label.
    tag: The output tag name, used as a label.
  """
    labels = create_labels(ptransform=ptransform, tag=tag)
    if isinstance(metric, int):
        metric = coders.VarIntCoder().encode(metric)
    return create_monitoring_info(urn, SUM_INT64_TYPE, metric, labels)
Ejemplo n.º 27
0
def int64_user_counter(namespace, name, metric, ptransform=None):
  # type: (...) -> metrics_pb2.MonitoringInfo

  """Return the counter monitoring info for the specifed URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The payload field to use in the monitoring info or an int value.
    ptransform: The ptransform id used as a label.
  """
  labels = create_labels(ptransform=ptransform, namespace=namespace, name=name)
  if isinstance(metric, int):
    metric = coders.VarIntCoder().encode(metric)
  return create_monitoring_info(
      USER_COUNTER_URN, SUM_INT64_TYPE, metric, labels)
Ejemplo n.º 28
0
def int64_distribution(urn, metric, ptransform=None, pcollection=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return a distribution monitoring info for the URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The DistributionData for the metric.
    ptransform: The ptransform id used as a label.
    pcollection: The pcollection id used as a label.
  """
    labels = create_labels(ptransform=ptransform, pcollection=pcollection)
    payload = _encode_distribution(coders.VarIntCoder(), metric.count,
                                   metric.sum, metric.min, metric.max)
    return create_monitoring_info(urn, DISTRIBUTION_INT64_TYPE, payload,
                                  labels)
Ejemplo n.º 29
0
def int64_counter(urn, metric, ptransform=None, pcollection=None, labels=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return the counter monitoring info for the specifed URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The payload field to use in the monitoring info or an int value.
    ptransform: The ptransform id used as a label.
    pcollection: The pcollection id used as a label.
  """
    labels = labels or {}
    labels.update(create_labels(ptransform=ptransform,
                                pcollection=pcollection))
    if isinstance(metric, int):
        metric = coders.VarIntCoder().encode(metric)
    return create_monitoring_info(urn, SUM_INT64_TYPE, metric, labels)
Ejemplo n.º 30
0
def int64_gauge(urn, metric, ptransform=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return the gauge monitoring info for the URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: An int representing the value. The current time will be used for
            the timestamp.
    ptransform: The ptransform id used as a label.
  """
    labels = create_labels(ptransform=ptransform)
    if isinstance(metric, int):
        value = metric
        time_ms = int(time.time()) * 1000
    else:
        raise TypeError(
            'Expected int metric type but received %s with value %s' %
            (type(metric), metric))
    coder = coders.VarIntCoder()
    payload = coder.encode(time_ms) + coder.encode(value)
    return create_monitoring_info(urn, LATEST_INT64_TYPE, payload, labels)