Ejemplo n.º 1
0
    def _handle_data_tuple(self, data_tuple, stream):
        start_time = time.time()

        values = []
        for value in data_tuple.values:
            values.append(self.serializer.deserialize(value))

        # create HeronTuple
        tup = TupleHelper.make_tuple(stream,
                                     data_tuple.key,
                                     values,
                                     roots=data_tuple.roots)

        deserialized_time = time.time()
        self.bolt_impl.process(tup)
        execute_latency_ns = (time.time() -
                              deserialized_time) * constants.SEC_TO_NS
        deserialize_latency_ns = (deserialized_time -
                                  start_time) * constants.SEC_TO_NS

        self.pplan_helper.context.invoke_hook_bolt_execute(
            tup, execute_latency_ns)

        self.bolt_metrics.deserialize_data_tuple(stream.id,
                                                 stream.component_name,
                                                 deserialize_latency_ns)
        self.bolt_metrics.execute_tuple(stream.id, stream.component_name,
                                        execute_latency_ns)
Ejemplo n.º 2
0
  def test_root_tuple_info(self):
    STREAM_ID = "stream id"
    TUPLE_ID = "tuple_id"

    root_info = TupleHelper.make_root_tuple_info(STREAM_ID, TUPLE_ID)
    self.assertEqual(root_info.stream_id, STREAM_ID)
    self.assertEqual(root_info.tuple_id, TUPLE_ID)
Ejemplo n.º 3
0
    def test_root_tuple_info(self):
        STREAM_ID = "stream id"
        TUPLE_ID = "tuple_id"

        root_info = TupleHelper.make_root_tuple_info(STREAM_ID, TUPLE_ID)
        self.assertEqual(root_info.stream_id, STREAM_ID)
        self.assertEqual(root_info.tuple_id, TUPLE_ID)
Ejemplo n.º 4
0
 def send_tick():
   tick = TupleHelper.make_tick_tuple()
   start_time = time.time()
   self.bolt_impl.process_tick(tick)
   tick_execute_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS
   self.bolt_metrics.execute_tuple(tick.id, tick.component, tick_execute_latency_ns)
   self.output_helper.send_out_tuples()
   self.looper.wake_up() # so emitted tuples would be added to buffer now
   self._prepare_tick_tup_timer()
Ejemplo n.º 5
0
 def test_tick_tuple(self):
   tup = TupleHelper.make_tick_tuple()
   self.assertEqual(tup.id, "__tick")
   self.assertEqual(tup.component, "__system")
   self.assertEqual(tup.stream, "__tick")
   self.assertIsNone(tup.task)
   self.assertIsNone(tup.values)
   self.assertIsNone(tup.roots)
   self.assertAlmostEqual(tup.creation_time, time.time(), delta=0.01)
Ejemplo n.º 6
0
 def send_tick():
   tick = TupleHelper.make_tick_tuple()
   start_time = time.time()
   self.bolt_impl.process_tick(tick)
   tick_execute_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS
   self.bolt_metrics.execute_tuple(tick.id, tick.component, tick_execute_latency_ns)
   self.output_helper.send_out_tuples()
   self.looper.wake_up() # so emitted tuples would be added to buffer now
   self._prepare_tick_tup_timer()
Ejemplo n.º 7
0
 def test_tick_tuple(self):
     tup = TupleHelper.make_tick_tuple()
     self.assertEqual(tup.id, "__tick")
     self.assertEqual(tup.component, "__system")
     self.assertEqual(tup.stream, "__tick")
     self.assertIsNone(tup.task)
     self.assertIsNone(tup.values)
     self.assertIsNone(tup.roots)
     self.assertAlmostEqual(tup.creation_time, time.time(), delta=0.01)
Ejemplo n.º 8
0
  def test_normal_tuple(self):
    STREAM = mock_protobuf.get_mock_stream_id(id="stream_id", component_name="comp_name")
    TUPLE_KEY = "tuple_key"
    VALUES = mock_generator.prim_list

    # No roots
    tup = TupleHelper.make_tuple(STREAM, TUPLE_KEY, VALUES)
    self.assertEqual(tup.id, TUPLE_KEY)
    self.assertEqual(tup.component, STREAM.component_name)
    self.assertEqual(tup.stream, STREAM.id)
    self.assertIsNone(tup.task)
    self.assertEqual(tup.values, VALUES)
    self.assertAlmostEqual(tup.creation_time, time.time(), delta=0.01)
    self.assertIsNone(tup.roots)
Ejemplo n.º 9
0
    def test_normal_tuple(self):
        STREAM = mock_protobuf.get_mock_stream_id(id="stream_id",
                                                  component_name="comp_name")
        TUPLE_KEY = "tuple_key"
        VALUES = mock_generator.prim_list

        # No roots
        tup = TupleHelper.make_tuple(STREAM, TUPLE_KEY, VALUES)
        self.assertEqual(tup.id, TUPLE_KEY)
        self.assertEqual(tup.component, STREAM.component_name)
        self.assertEqual(tup.stream, STREAM.id)
        self.assertIsNone(tup.task)
        self.assertEqual(tup.values, VALUES)
        self.assertAlmostEqual(tup.creation_time, time.time(), delta=0.01)
        self.assertIsNone(tup.roots)
Ejemplo n.º 10
0
  def _handle_data_tuple(self, data_tuple, stream):
    start_time = time.time()

    values = []
    for value in data_tuple.values:
      values.append(self.serializer.deserialize(value))

    # create HeronTuple
    tup = TupleHelper.make_tuple(stream, data_tuple.key, values, roots=data_tuple.roots)

    deserialized_time = time.time()
    self.bolt_impl.process(tup)
    execute_latency_ns = (time.time() - deserialized_time) * system_constants.SEC_TO_NS
    deserialize_latency_ns = (deserialized_time - start_time) * system_constants.SEC_TO_NS

    self.pplan_helper.context.invoke_hook_bolt_execute(tup, execute_latency_ns)

    self.bolt_metrics.deserialize_data_tuple(stream.id, stream.component_name,
                                             deserialize_latency_ns)
    self.bolt_metrics.execute_tuple(stream.id, stream.component_name, execute_latency_ns)
Ejemplo n.º 11
0
    def emit(self,
             tup,
             tup_id=None,
             stream=Stream.DEFAULT_STREAM_ID,
             direct_task=None,
             need_task_ids=False):
        """Emits a new tuple from this Spout

    It is compatible with StreamParse API.

    :type tup: list or tuple
    :param tup: the new output Tuple to send from this spout,
                should contain only serializable data.
    :type tup_id: str or object
    :param tup_id: the ID for the Tuple. Leave this blank for an unreliable emit.
                   (Same as messageId in Java)
    :type stream: str
    :param stream: the ID of the stream this Tuple should be emitted to.
                   Leave empty to emit to the default stream.
    :type direct_task: int
    :param direct_task: the task to send the Tuple to if performing a direct emit.
    :type need_task_ids: bool
    :param need_task_ids: indicate whether or not you would like the task IDs the Tuple was emitted.
    """
        # first check whether this tuple is sane
        self.pplan_helper.check_output_schema(stream, tup)

        # get custom grouping target task ids; get empty list if not custom grouping
        custom_target_task_ids = self.pplan_helper.choose_tasks_for_custom_grouping(
            stream, tup)

        self.pplan_helper.context.invoke_hook_emit(tup, stream, None)

        data_tuple = tuple_pb2.HeronDataTuple()
        data_tuple.key = 0

        if direct_task is not None:
            if not isinstance(direct_task, int):
                raise TypeError(
                    "direct_task argument needs to be an integer, given: %s" %
                    str(type(direct_task)))
            # performing emit-direct
            data_tuple.dest_task_ids.append(direct_task)
        elif custom_target_task_ids is not None:
            # for custom grouping
            for task_id in custom_target_task_ids:
                data_tuple.dest_task_ids.append(task_id)

        if tup_id is not None:
            tuple_info = TupleHelper.make_root_tuple_info(stream, tup_id)
            if self.acking_enabled:
                # this message is rooted
                root = data_tuple.roots.add()
                root.taskid = self.pplan_helper.my_task_id
                root.key = tuple_info.key
                self.in_flight_tuples[tuple_info.key] = tuple_info
            else:
                self.immediate_acks.append(tuple_info)

        tuple_size_in_bytes = 0

        start_time = time.time()

        # Serialize
        for obj in tup:
            serialized = self.serializer.serialize(obj)
            data_tuple.values.append(serialized)
            tuple_size_in_bytes += len(serialized)

        serialize_latency_ns = (time.time() - start_time) * constants.SEC_TO_NS
        self.spout_metrics.serialize_data_tuple(stream, serialize_latency_ns)

        super(SpoutInstance,
              self).admit_data_tuple(stream_id=stream,
                                     data_tuple=data_tuple,
                                     tuple_size_in_bytes=tuple_size_in_bytes)
        self.total_tuples_emitted += 1
        self.spout_metrics.update_emit_count(stream)
        if need_task_ids:
            sent_task_ids = custom_target_task_ids or []
            if direct_task is not None:
                sent_task_ids.append(direct_task)
            return sent_task_ids
Ejemplo n.º 12
0
  def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID,
           direct_task=None, need_task_ids=False):
    """Emits a new tuple from this Spout

    It is compatible with StreamParse API.

    :type tup: list or tuple
    :param tup: the new output Tuple to send from this spout,
                should contain only serializable data.
    :type tup_id: str or object
    :param tup_id: the ID for the Tuple. Leave this blank for an unreliable emit.
                   (Same as messageId in Java)
    :type stream: str
    :param stream: the ID of the stream this Tuple should be emitted to.
                   Leave empty to emit to the default stream.
    :type direct_task: int
    :param direct_task: the task to send the Tuple to if performing a direct emit.
    :type need_task_ids: bool
    :param need_task_ids: indicate whether or not you would like the task IDs the Tuple was emitted.
    """
    # first check whether this tuple is sane
    self.pplan_helper.check_output_schema(stream, tup)

    # get custom grouping target task ids; get empty list if not custom grouping
    custom_target_task_ids = self.pplan_helper.choose_tasks_for_custom_grouping(stream, tup)

    self.pplan_helper.context.invoke_hook_emit(tup, stream, None)

    data_tuple = tuple_pb2.HeronDataTuple()
    data_tuple.key = 0

    if direct_task is not None:
      if not isinstance(direct_task, int):
        raise TypeError("direct_task argument needs to be an integer, given: %s"
                        % str(type(direct_task)))
      # performing emit-direct
      data_tuple.dest_task_ids.append(direct_task)
    elif custom_target_task_ids is not None:
      # for custom grouping
      for task_id in custom_target_task_ids:
        data_tuple.dest_task_ids.append(task_id)

    if tup_id is not None:
      tuple_info = TupleHelper.make_root_tuple_info(stream, tup_id)
      if self.acking_enabled:
        # this message is rooted
        root = data_tuple.roots.add()
        root.taskid = self.pplan_helper.my_task_id
        root.key = tuple_info.key
        self.in_flight_tuples[tuple_info.key] = tuple_info
      else:
        self.immediate_acks.append(tuple_info)

    tuple_size_in_bytes = 0

    start_time = time.time()

    # Serialize
    for obj in tup:
      serialized = self.serializer.serialize(obj)
      data_tuple.values.append(serialized)
      tuple_size_in_bytes += len(serialized)

    serialize_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS
    self.spout_metrics.serialize_data_tuple(stream, serialize_latency_ns)

    super(SpoutInstance, self).admit_data_tuple(stream_id=stream, data_tuple=data_tuple,
                                                tuple_size_in_bytes=tuple_size_in_bytes)
    self.total_tuples_emitted += 1
    self.spout_metrics.update_emit_count(stream)
    if need_task_ids:
      sent_task_ids = custom_target_task_ids or []
      if direct_task is not None:
        sent_task_ids.append(direct_task)
      return sent_task_ids