Example #1
0
  def test_sanitize_outputs(self):
    # outputs is None (no argument to outputs)
    spec = HeronComponentSpec("spout", "class", True, 1)
    ret = spec._sanitize_outputs()
    self.assertIsNone(ret)

    # outputs neither list nor tuple
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = "string"
    with self.assertRaises(TypeError):
      spec._sanitize_outputs()

    # output list contains a non-string and non-Stream object
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = ["string", False, 123]
    with self.assertRaises(TypeError):
      spec._sanitize_outputs()

    # output list is all string
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = ["string", "hello", "heron"]
    ret = spec._sanitize_outputs()
    self.assertEqual(ret, {"default": ["string", "hello", "heron"]})

    # output list has mixed stream
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = ["string", "hello", Stream(fields=["abc", "def"], name="another_stream"),
                    Stream(fields=["another", "default"], name="default")]
    ret = spec._sanitize_outputs()
    self.assertEqual(ret, {"default": ["string", "hello", "another", "default"],
                           "another_stream": ["abc", "def"]})
class TestWordSpout(Spout):
    words = ('nathan', 'mike', 'jackson', 'golda', 'bertels')
    outputs = (Stream(fields=('field1', ), name='stream1'),
               Stream(fields=('field2', 'field3'), name='stream2'))

    def next_tuple(self):
        sleep(5)
        word1 = choice(self.words)
        word2 = choice(self.words)
        word3 = choice(self.words)
        self.log('next_tuple stream1 ' + word1)
        self.emit(tup=(word1, ), stream="stream1")
        self.log('next_tuple stream2 ' + word2 + ' ' + word3)
        self.emit(tup=(word2, word3), stream="stream2")
Example #3
0
class TestSane(Topology):
    config = {
        "topology.wide.config.1": "value",
        "spout.overriden.config": True
    }
    spout = HeronComponentSpec(None,
                               "sp_class",
                               True,
                               3,
                               inputs=None,
                               outputs=[
                                   "word", "count",
                                   Stream(fields=['error_msg'],
                                          name='error_stream')
                               ],
                               config={
                                   "spout.specific.config.1": "value",
                                   "spout.specific.config.2": True,
                                   "spout.specific.config.3": -12.4,
                                   "spout.specific.config.4": [1, 2, 3],
                                   "spout.overriden.config": False
                               })
    bolt = HeronComponentSpec(None,
                              "bl_class",
                              False,
                              4,
                              inputs={
                                  spout: Grouping.SHUFFLE,
                                  spout['error_stream']: Grouping.ALL
                              })
Example #4
0
    def test_constructor(self):
        # sane
        stream = Stream(fields=['word', 'count'])
        self.assertEqual(stream.fields, ['word', 'count'])
        self.assertEqual(stream.stream_id, "default")

        stream = Stream(fields=['error', 'message'], name='error_stream')
        self.assertEqual(stream.fields, ['error', 'message'])
        self.assertEqual(stream.stream_id, "error_stream")

        stream = Stream()
        self.assertEqual(stream.fields, [])
        self.assertEqual(stream.stream_id, "default")

        # fields not list, tuple nor None
        with self.assertRaises(TypeError):
            Stream(fields={"key": "value"})

        # fields contains non-string
        with self.assertRaises(TypeError):
            Stream(fields=["hello", 123, "world"])

        # stream name not string
        with self.assertRaises(TypeError):
            Stream(fields=["hello", "world"], name=True)
        with self.assertRaises(TypeError):
            Stream(fields=["hello", "world"], name=None)
Example #5
0
  def test_get_out_streamids(self):
    # outputs is none
    spec = HeronComponentSpec("spout", "class", True, 1)
    ret = spec.get_out_streamids()
    self.assertEqual(ret, set())

    # outputs neither list nor tuple
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = "string"
    with self.assertRaises(TypeError):
      spec.get_out_streamids()

    # outputs sane
    spec = HeronComponentSpec("spout", "class", True, 1)
    spec.outputs = ["string", "hello", Stream(fields=["abc", "def"], name="another_stream"),
                    Stream(fields=["another", "default"], name="default")]
    ret = spec.get_out_streamids()
    self.assertEqual(ret, {"default", "another_stream"})
Example #6
0
    def test_get_item(self):
        # HeronComponentSpec name set
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['another_stream']
        self.assertEqual(ret, GlobalStreamId("spout", "another_stream"))

        # HeronComponentSpec name not set
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['default']
        self.assertEqual(ret, GlobalStreamId(spec, "default"))

        # stream id not registered
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        with self.assertRaises(ValueError):
            spec['non_existent_stream']
Example #7
0
class MultiStreamSpout(Spout):
    """WordSpout: emits a set of words repeatedly"""
    # output field declarer
    outputs = ['word', Stream(fields=['error_msg'], name='error')]

    def initialize(self, config, context):
        self.logger.info("In initialize() of WordSpout")
        self.words = cycle(["hello", "bye", "good", "bad", "heron", "storm"])

        self.emit_count = 0

        self.logger.info(f"Component-specific config: \n{str(config)}")
        self.logger.info(f"Context: \n{str(context)}")

    def next_tuple(self):
        word = next(self.words)
        self.emit([word])
        self.emit_count += 1

        if self.emit_count % 100000 == 0:
            self.logger.info(f"Emitted {str(self.emit_count)}")
            self.logger.info("Emitting to error stream")
            self.emit(["test error message"], stream='error')
Example #8
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()
class IntegrationTestSpout(Spout):
    """Base spout for integration test

  Every spout of integration test topology consists of this instance, each delegating user's spout.
  """
    outputs = [
        Stream(fields=[integ_const.INTEGRATION_TEST_TERMINAL],
               name=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
    ]

    @classmethod
    def spec(cls,
             name,
             par,
             config,
             user_spout_classpath,
             user_output_fields=None):
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)

        config[integ_const.USER_SPOUT_CLASSPATH] = user_spout_classpath
        # avoid modification to cls.outputs
        _outputs = copy.copy(cls.outputs)
        if user_output_fields is not None:
            _outputs.extend(user_output_fields)
        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=True,
                                  par=par,
                                  inputs=None,
                                  outputs=_outputs,
                                  config=config)

    def initialize(self, config, context):
        user_spout_classpath = config.get(integ_const.USER_SPOUT_CLASSPATH,
                                          None)
        if user_spout_classpath is None:
            raise RuntimeError(
                "User defined integration test spout was not found")
        user_spout_cls = self._load_user_spout(context.get_topology_pex_path(),
                                               user_spout_classpath)
        self.user_spout = user_spout_cls(delegate=self)

        self.max_executions = config.get(integ_const.USER_MAX_EXECUTIONS,
                                         integ_const.MAX_EXECUTIONS)
        assert isinstance(self.max_executions, int) and self.max_executions > 0
        Log.info("Max executions: %d" % self.max_executions)
        self.tuples_to_complete = 0

        self.user_spout.initialize(config, context)

    @staticmethod
    def _load_user_spout(pex_file, classpath):
        pex_loader.load_pex(pex_file)
        cls = pex_loader.import_and_get_class(pex_file, classpath)
        return cls

    @property
    def is_done(self):
        return self.max_executions == 0

    def next_tuple(self):
        if self.is_done:
            return

        self.max_executions -= 1
        Log.info("max executions: %d" % self.max_executions)

        self.user_spout.next_tuple()

        if self.is_done:
            self._emit_terminal_if_needed()
            Log.info("This topology is finished.")

    def ack(self, tup_id):
        Log.info("Received an ack with tuple id: %s" % str(tup_id))
        self.tuples_to_complete -= 1
        if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID:
            self.user_spout.ack(tup_id)
        self._emit_terminal_if_needed()

    def fail(self, tup_id):
        Log.info("Received a fail message with tuple id: %s" % str(tup_id))
        self.tuples_to_complete -= 1
        if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID:
            self.user_spout.fail(tup_id)
        self._emit_terminal_if_needed()

    def emit(self,
             tup,
             tup_id=None,
             stream=Stream.DEFAULT_STREAM_ID,
             direct_task=None,
             need_task_ids=None):
        """Emits from this integration test spout

    Overriden method which will be called when user's spout calls emit()
    """
        # if is_control True -> control stream should not count
        self.tuples_to_complete += 1

        if tup_id is None:
            Log.info("Add tup_id for tuple: %s" % str(tup))
            _tup_id = integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID
        else:
            _tup_id = tup_id

        super(IntegrationTestSpout, self).emit(tup, _tup_id, stream,
                                               direct_task, need_task_ids)

    def _emit_terminal_if_needed(self):
        Log.info("is_done: %s, tuples_to_complete: %s" %
                 (self.is_done, self.tuples_to_complete))
        if self.is_done and self.tuples_to_complete == 0:
            Log.info("Emitting terminals to downstream")
            super(IntegrationTestSpout, self).emit(
                [integ_const.INTEGRATION_TEST_TERMINAL],
                stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
Example #10
0
class StreamletSpoutBase(object):
    """StreamletSpoutBase"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
Example #11
0
class IntegrationTestBolt(Bolt):
    """Base bolt for integration test

  Every bolt of integration test topology consists of this instance, each delegating user's bolt.
  """
    outputs = [
        Stream(fields=[integ_const.INTEGRATION_TEST_TERMINAL],
               name=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
    ]

    @classmethod
    def spec(cls,
             name,
             par,
             inputs,
             config,
             user_bolt_classpath,
             user_output_fields=None):
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)
        config[integ_const.USER_BOLT_CLASSPATH] = user_bolt_classpath
        # avoid modification to cls.outputs
        _outputs = copy.copy(cls.outputs)
        if user_output_fields is not None:
            _outputs.extend(user_output_fields)
        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=False,
                                  par=par,
                                  inputs=inputs,
                                  outputs=_outputs,
                                  config=config)

    def initialize(self, config, context):
        user_bolt_classpath = config.get(integ_const.USER_BOLT_CLASSPATH, None)
        if user_bolt_classpath is None:
            raise RuntimeError("User defined integration bolt was not found")
        user_bolt_cls = self._load_user_bolt(context.get_topology_pex_path(),
                                             user_bolt_classpath)
        self.user_bolt = user_bolt_cls(delegate=self)

        upstream_components = set()
        self.terminal_to_receive = 0
        for streamId in context.get_this_sources().keys():
            # streamId is topology_pb2.StreamId protobuf message
            upstream_components.add(streamId.component_name)
        for comp_name in upstream_components:
            self.terminal_to_receive += len(
                context.get_component_tasks(comp_name))

        self.tuple_received = 0
        self.tuples_processed = 0
        self.current_tuple_processing = None

        Log.info("Terminals to receive: %d" % self.terminal_to_receive)
        self.user_bolt.initialize(config, context)

    @staticmethod
    def _load_user_bolt(pex_file, classpath):
        pex_loader.load_pex(pex_file)
        cls = pex_loader.import_and_get_class(pex_file, classpath)
        return cls

    @property
    def is_done(self):
        return self.terminal_to_receive == 0

    def process(self, tup):
        self.tuple_received += 1
        stream_id = tup.stream

        Log.info("Received a tuple: %s from %s" % (tup, stream_id))
        if stream_id == integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID:
            self.terminal_to_receive -= 1
            if self.is_done:
                if isinstance(self.user_bolt, BatchBolt):
                    Log.info("Invoke bolt to do finish batch")
                    self.user_bolt.finish_batch()

                Log.info("Populating the terminals to downstream")
                super(IntegrationTestBolt, self).emit(
                    [integ_const.INTEGRATION_TEST_TERMINAL],
                    stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
        else:
            self.current_tuple_processing = tup
            self.user_bolt.process(tup)
            self.ack(tup)

    def emit(self,
             tup,
             stream=Stream.DEFAULT_STREAM_ID,
             anchors=None,
             direct_task=None,
             need_task_ids=False):
        if tup is None:
            super(IntegrationTestBolt,
                  self).emit(list(self.current_tuple_processing),
                             stream=stream,
                             anchors=anchors,
                             direct_task=direct_task,
                             need_task_ids=need_task_ids)
        else:
            super(IntegrationTestBolt, self).emit(tup, stream, anchors,
                                                  direct_task, need_task_ids)

    def ack(self, tup):
        Log.info("Trying to do an ack. tuples processed: %d, received: %d" %
                 (self.tuples_processed, self.tuple_received))
        if self.tuples_processed < self.tuple_received:
            super(IntegrationTestBolt, self).ack(tup)
            self.tuples_processed += 1

    def fail(self, tup):
        Log.info("Trying to do a fail. tuples processed: %d, received: %d" %
                 (self.tuples_processed, self.tuple_received))
        if self.tuples_processed < self.tuple_received:
            super(IntegrationTestBolt, self).fail(tup)
            self.tuples_processed += 1
Example #12
0
class DslSpoutBase(object):
    """DslSpoutBoltBase"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
Example #13
0
class StreamletBoltBase:
    """StreamletBoltBase"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]