Ejemplo n.º 1
0
class TestSane(Topology):
    config = {
        "topology.wide.config.1": "value",
        "spout.overriden.config": True
    }
    spout = HeronComponentSpec(None,
                               "sp_class",
                               True,
                               3,
                               inputs=None,
                               outputs=[
                                   "word", "count",
                                   Stream(fields=['error_msg'],
                                          name='error_stream')
                               ],
                               config={
                                   "spout.specific.config.1": "value",
                                   "spout.specific.config.2": True,
                                   "spout.specific.config.3": -12.4,
                                   "spout.specific.config.4": [1, 2, 3],
                                   "spout.overriden.config": False
                               })
    bolt = HeronComponentSpec(None,
                              "bl_class",
                              False,
                              4,
                              inputs={
                                  spout: Grouping.SHUFFLE,
                                  spout['error_stream']: Grouping.ALL
                              })
Ejemplo n.º 2
0
class MapBolt(Bolt, StatefulComponent):
    """MapBolt"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
    FUNCTION = 'function'

    def initState(self, stateful_state):
        # mapBolt does not have any state
        pass

    def preSave(self, checkpoint_id):
        # mapBolt does not have any state
        pass

    def initialize(self, config, context):
        self.logger.debug("MapBolt's Component-specific config: \n%s" %
                          str(config))
        self.processed = 0
        self.emitted = 0
        if MapBolt.FUNCTION in config:
            self.map_function = config[MapBolt.FUNCTION]
            if not callable(self.map_function):
                raise RuntimeError("Map function has to be callable")
        else:
            raise RuntimeError("MapBolt needs to be passed map function")

    def process(self, tup):
        retval = self.map_function(tup.values[0])
        self.emit([retval], stream='output')
        self.processed += 1
        self.emitted += 1
        self.ack(tup)
Ejemplo n.º 3
0
class JoinBolt(SlidingWindowBolt):
    """JoinBolt"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
    WINDOWDURATION = SlidingWindowBolt.WINDOW_DURATION_SECS
    SLIDEINTERVAL = SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS

    @staticmethod
    def _add(key, value, mymap):
        if key in mymap:
            mymap[key].append(value)
        else:
            mymap[key] = [value]

    def processWindow(self, window_config, tuples):
        # our temporary map
        mymap = {}
        for tup in tuples:
            userdata = tup.values[0]
            if not isinstance(userdata,
                              collections.Iterable) or len(userdata) != 2:
                raise RuntimeError("Join tuples must be iterable of length 2")
            self._add(userdata[0], userdata[1], mymap)
        for (key, values) in mymap.items():
            self.emit([(key, values)], stream='output')
Ejemplo n.º 4
0
class SampleBolt(Bolt, StatefulComponent):
    """SampleBolt"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
    FRACTION = 'fraction'

    def initState(self, stateful_state):
        # sample does not have any state
        pass

    def preSave(self, checkpoint_id):
        # sample does not have any state
        pass

    def initialize(self, config, context):
        self.logger.debug("SampleBolt's Component-specific config: \n%s" %
                          str(config))
        self.processed = 0
        self.emitted = 0
        if SampleBolt.FRACTION in config:
            self.sample_fraction = config[SampleBolt.FRACTION]
            if not isinstance(self.sample_fraction, float):
                raise RuntimeError("Sample fraction has to be a float")
            if self.sample_fraction > 1.0:
                raise RuntimeError("Sample fraction has to be <= 1.0")
        else:
            raise RuntimeError("SampleBolt needs to be passed filter function")

    def process(self, tup):
        self.processed += 1
        self.ack(tup)
        raise RuntimeError("SampleBolt not fully functional")
Ejemplo n.º 5
0
  def test_constructor(self):
    # sane
    stream = Stream(fields=['word', 'count'])
    self.assertEqual(stream.fields, ['word', 'count'])
    self.assertEqual(stream.stream_id, "default")

    stream = Stream(fields=['error', 'message'], name='error_stream')
    self.assertEqual(stream.fields, ['error', 'message'])
    self.assertEqual(stream.stream_id, "error_stream")

    stream = Stream()
    self.assertEqual(stream.fields, [])
    self.assertEqual(stream.stream_id, "default")

    # fields not list, tuple nor None
    with self.assertRaises(TypeError):
      Stream(fields={"key": "value"})

    # fields contains non-string
    with self.assertRaises(TypeError):
      Stream(fields=["hello", 123, "world"])

    # stream name not string
    with self.assertRaises(TypeError):
      Stream(fields=["hello", "world"], name=True)
    with self.assertRaises(TypeError):
      Stream(fields=["hello", "world"], name=None)
Ejemplo n.º 6
0
    def test_get_out_streamids(self):
        # outputs is none
        spec = HeronComponentSpec("spout", "class", True, 1)
        ret = spec.get_out_streamids()
        self.assertEqual(ret, set())

        # outputs neither list nor tuple
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = "string"
        with self.assertRaises(TypeError):
            spec.get_out_streamids()

        # outputs sane
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec.get_out_streamids()
        self.assertEqual(ret, {"default", "another_stream"})
Ejemplo n.º 7
0
    def test_get_item(self):
        # HeronComponentSpec name set
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['another_stream']
        self.assertEqual(ret, GlobalStreamId("spout", "another_stream"))

        # HeronComponentSpec name not set
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['default']
        self.assertEqual(ret, GlobalStreamId(spec, "default"))

        # stream id not registered
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        with self.assertRaises(ValueError):
            spec['non_existent_stream']
Ejemplo n.º 8
0
    def test_sanitize_outputs(self):
        # outputs is None (no argument to outputs)
        spec = HeronComponentSpec("spout", "class", True, 1)
        ret = spec._sanitize_outputs()
        self.assertIsNone(ret)

        # outputs neither list nor tuple
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = "string"
        with self.assertRaises(TypeError):
            spec._sanitize_outputs()

        # output list contains a non-string and non-Stream object
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = ["string", False, 123]
        with self.assertRaises(TypeError):
            spec._sanitize_outputs()

        # output list is all string
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = ["string", "hello", "heron"]
        ret = spec._sanitize_outputs()
        self.assertEqual(ret, {"default": ["string", "hello", "heron"]})

        # output list has mixed stream
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec._sanitize_outputs()
        self.assertEqual(
            ret, {
                "default": ["string", "hello", "another", "default"],
                "another_stream": ["abc", "def"]
            })
Ejemplo n.º 9
0
class MultiStreamSpout(Spout):
    """WordSpout: emits a set of words repeatedly"""
    # output field declarer
    outputs = ['word', Stream(fields=['error_msg'], name='error')]

    def initialize(self, config, context):
        self.logger.info("In initialize() of WordSpout")
        self.words = cycle(["hello", "bye", "good", "bad", "heron", "storm"])

        self.emit_count = 0

        self.logger.info("Component-specific config: \n%s" % str(config))
        self.logger.info("Context: \n%s" % str(context))

    def next_tuple(self):
        word = next(self.words)
        self.emit([word])
        self.emit_count += 1

        if self.emit_count % 100000 == 0:
            self.logger.info("Emitted %s" % str(self.emit_count))
            self.logger.info("Emitting to error stream")
            self.emit(["test error message"], stream='error')
Ejemplo n.º 10
0
class FixedLinesSpout(Spout):
    """FixedLinesSpout: Generates a line from a set of static lines again and again
  """
    outputs = [Stream(fields=['_output_'], name='output')]

    # pylint: disable=unused-argument
    def initialize(self, config, context):
        """Implements FixedLines Spout's initialize method"""
        self.logger.info("Initializing FixedLinesSpout with the following")
        self.logger.info("Component-specific config: \n%s" % str(config))
        self.words = [
            "Mary had a little lamb", "Humpy Dumpy sat on a wall",
            "Here we round the Moulberry bush"
        ]
        self.index = 0
        self.emit_count = 0
        self.ack_count = 0
        self.fail_count = 0

    def _get_next_line(self):
        retval = self.words[self.index]
        self.index += 1
        if self.index >= len(self.words):
            self.index = 0
        return retval

    def next_tuple(self):
        self.emit([self._get_next_line()], stream='output')
        self.emit_count += 1

    def ack(self, tup_id):
        self.ack_count += 1
        self.logger.debug("Acked tuple %s" % str(tup_id))

    def fail(self, tup_id):
        self.fail_count += 1
        self.logger.debug("Failed tuple %s" % str(tup_id))
Ejemplo n.º 11
0
class RepartitionBolt(Bolt, StatefulComponent):
    """RepartitionBolt"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]

    def initState(self, stateful_state):
        # repartition does not have any state
        pass

    def preSave(self, checkpoint_id):
        # repartition does not have any state
        pass

    def initialize(self, config, context):
        self.logger.debug("RepartitionBolt's Component-specific config: \n%s" %
                          str(config))
        self.processed = 0
        self.emitted = 0

    def process(self, tup):
        self.emit(tup.values, stream='output')
        self.processed += 1
        self.emitted += 1
        self.ack(tup)
Ejemplo n.º 12
0
class ReduceByKeyAndWindowBolt(SlidingWindowBolt):
  """ReduceByKeyAndWindowBolt"""
  # output declarer
  outputs = [Stream(fields=['_output_'], name='output')]
  FUNCTION = 'function'
  WINDOWDURATION = SlidingWindowBolt.WINDOW_DURATION_SECS
  SLIDEINTERVAL = SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS

  def initialize(self, config, context):
    super(ReduceByKeyAndWindowBolt, self).initialize(config, context)
    if ReduceByKeyAndWindowBolt.FUNCTION not in config:
      raise RuntimeError("FUNCTION not specified in reducebywindow operator")
    self.reduce_function = config[ReduceByKeyAndWindowBolt.FUNCTION]
    if not callable(self.reduce_function):
      raise RuntimeError("Reduce Function has to be callable")

  @staticmethod
  def _add(key, value, mymap):
    if key in mymap:
      mymap[key].append(value)
    else:
      mymap[key] = [value]

  def processWindow(self, window_config, tuples):
    # our temporary map
    mymap = {}
    for tup in tuples:
      userdata = tup.values[0]
      if not isinstance(userdata, collections.Iterable) or len(userdata) != 2:
        raise RuntimeError("ReduceByWindow tuples must be iterable of length 2")
      self._add(userdata[0], userdata[1], mymap)
    for (key, values) in mymap.items():
      result = values[0]
      for value in values[1:]:
        self.reduce_function(result, value)
      self.emit([(key, result)], stream='output')
Ejemplo n.º 13
0
class IntegrationTestBolt(Bolt):
    """Base bolt for integration test

  Every bolt of integration test topology consists of this instance, each delegating user's bolt.
  """
    outputs = [
        Stream(fields=[integ_const.INTEGRATION_TEST_TERMINAL],
               name=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
    ]

    @classmethod
    def spec(cls,
             name,
             par,
             inputs,
             config,
             user_bolt_classpath,
             user_output_fields=None):
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)
        config[integ_const.USER_BOLT_CLASSPATH] = user_bolt_classpath
        # avoid modification to cls.outputs
        _outputs = copy.copy(cls.outputs)
        if user_output_fields is not None:
            _outputs.extend(user_output_fields)
        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=False,
                                  par=par,
                                  inputs=inputs,
                                  outputs=_outputs,
                                  config=config)

    def initialize(self, config, context):
        user_bolt_classpath = config.get(integ_const.USER_BOLT_CLASSPATH, None)
        if user_bolt_classpath is None:
            raise RuntimeError("User defined integration bolt was not found")
        user_bolt_cls = self._load_user_bolt(context.get_topology_pex_path(),
                                             user_bolt_classpath)
        self.user_bolt = user_bolt_cls(delegate=self)

        upstream_components = set()
        self.terminal_to_receive = 0
        for streamId in context.get_this_sources().keys():
            # streamId is topology_pb2.StreamId protobuf message
            upstream_components.add(streamId.component_name)
        for comp_name in upstream_components:
            self.terminal_to_receive += len(
                context.get_component_tasks(comp_name))

        self.tuple_received = 0
        self.tuples_processed = 0
        self.current_tuple_processing = None

        Log.info("Terminals to receive: %d" % self.terminal_to_receive)
        self.user_bolt.initialize(config, context)

    @staticmethod
    def _load_user_bolt(pex_file, classpath):
        pex_loader.load_pex(pex_file)
        cls = pex_loader.import_and_get_class(pex_file, classpath)
        return cls

    @property
    def is_done(self):
        return self.terminal_to_receive == 0

    def process(self, tup):
        self.tuple_received += 1
        stream_id = tup.stream

        Log.info("Received a tuple: %s from %s" % (tup, stream_id))
        if stream_id == integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID:
            self.terminal_to_receive -= 1
            if self.is_done:
                if isinstance(self.user_bolt, BatchBolt):
                    Log.info("Invoke bolt to do finish batch")
                    self.user_bolt.finish_batch()

                Log.info("Populating the terminals to downstream")
                super(IntegrationTestBolt, self).emit(
                    [integ_const.INTEGRATION_TEST_TERMINAL],
                    stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
        else:
            self.current_tuple_processing = tup
            self.user_bolt.process(tup)
            self.ack(tup)

    def emit(self,
             tup,
             stream=Stream.DEFAULT_STREAM_ID,
             anchors=None,
             direct_task=None,
             need_task_ids=False):
        if tup is None:
            super(IntegrationTestBolt,
                  self).emit(list(self.current_tuple_processing),
                             stream=stream,
                             anchors=anchors,
                             direct_task=direct_task,
                             need_task_ids=need_task_ids)
        else:
            super(IntegrationTestBolt, self).emit(tup, stream, anchors,
                                                  direct_task, need_task_ids)

    def ack(self, tup):
        Log.info("Trying to do an ack. tuples processed: %d, received: %d" %
                 (self.tuples_processed, self.tuple_received))
        if self.tuples_processed < self.tuple_received:
            super(IntegrationTestBolt, self).ack(tup)
            self.tuples_processed += 1

    def fail(self, tup):
        Log.info("Trying to do a fail. tuples processed: %d, received: %d" %
                 (self.tuples_processed, self.tuple_received))
        if self.tuples_processed < self.tuple_received:
            super(IntegrationTestBolt, self).fail(tup)
            self.tuples_processed += 1
Ejemplo n.º 14
0
class PulsarSpout(Spout):
  """PulsarSpout: reads from a pulsar topic"""

  # pylint: disable=too-many-instance-attributes
  # pylint: disable=no-self-use

  outputs = [Stream(fields=['_output_'], name='output')]

  def default_deserializer(self, msg):
    return [str(msg)]

  # TopologyBuilder uses these constants to set
  # cluster/topicname
  serviceUrl = "PULSAR_SERVICE_URL"
  topicName = "PULSAR_TOPIC"
  receiveTimeoutMs = "PULSAR_RECEIVE_TIMEOUT_MS"
  deserializer = "PULSAR_MESSAGE_DESERIALIZER"

  def initialize(self, config, context):
    """Implements Pulsar Spout's initialize method"""
    self.logger.info("Initializing PulsarSpout with the following")
    self.logger.info("Component-specific config: \n%s" % str(config))
    self.logger.info("Context: \n%s" % str(context))

    self.emit_count = 0
    self.ack_count = 0
    self.fail_count = 0

    if not PulsarSpout.serviceUrl in config or not PulsarSpout.topicName in config:
      self.logger.fatal("Need to specify both serviceUrl and topicName")
    self.pulsar_cluster = str(config[PulsarSpout.serviceUrl])
    self.topic = str(config[PulsarSpout.topicName])
    mode = config[api_constants.TOPOLOGY_RELIABILITY_MODE]
    if mode == api_constants.TopologyReliabilityMode.ATLEAST_ONCE:
      self.acking_timeout = 1000 * int(config[api_constants.TOPOLOGY_MESSAGE_TIMEOUT_SECS])
    else:
      self.acking_timeout = 30000
    if PulsarSpout.receiveTimeoutMs in config:
      self.receive_timeout_ms = config[PulsarSpout.receiveTimeoutMs]
    else:
      self.receive_timeout_ms = 10
    if PulsarSpout.deserializer in config:
      self.deserializer = config[PulsarSpout.deserializer]
      if not callable(self.deserializer):
        self.logger.fatal("Pulsar Message Deserializer needs to be callable")
    else:
      self.deserializer = self.default_deserializer

    # First generate the config
    self.logConfFileName = GenerateLogConfig(context)
    self.logger.info("Generated LogConf at %s" % self.logConfFileName)

    # We currently use the high level consumer api
    # For supporting exactly once, we will need to switch
    # to using lower level Reader api, when it becomes
    # available in python
    self.client = pulsar.Client(self.pulsar_cluster, log_conf_file_path=self.logConfFileName)
    self.logger.info("Setup Client with cluster %s" % self.pulsar_cluster)
    try:
      self.consumer = self.client.subscribe(self.topic, context.get_topology_name(),
                                            consumer_type=pulsar.ConsumerType.Failover,
                                            unacked_messages_timeout_ms=self.acking_timeout)
    except Exception as e:
      self.logger.fatal("Pulsar client subscription failed: %s" % str(e))

    self.logger.info("Subscribed to topic %s" % self.topic)

  def next_tuple(self):
    try:
      msg = self.consumer.receive(timeout_millis=self.receive_timeout_ms)
    except Exception as e:
      self.logger.debug("Exception during recieve: %s" % str(e))
      return

    try:
      self.emit(self.deserializer(msg.data()), tup_id=msg.message_id())
      self.emit_count += 1
    except Exception as e:
      self.logger.info("Exception during emit: %s" % str(e))

  def ack(self, tup_id):
    self.ack_count += 1
    self.consumer.acknowledge(tup_id)

  def fail(self, tup_id):
    self.fail_count += 1
    self.logger.debug("Failed tuple %s" % str(tup_id))
Ejemplo n.º 15
0
class DslBoltBase(object):
    """DslBoltBase"""
    # output declarer
    outputs = [Stream(fields=['_output_'], name='output')]
Ejemplo n.º 16
0
class IntegrationTestSpout(Spout):
    """Base spout for integration test

  Every spout of integration test topology consists of this instance, each delegating user's spout.
  """
    outputs = [
        Stream(fields=[integ_const.INTEGRATION_TEST_TERMINAL],
               name=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
    ]

    @classmethod
    def spec(cls,
             name,
             par,
             config,
             user_spout_classpath,
             user_output_fields=None):
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)

        config[integ_const.USER_SPOUT_CLASSPATH] = user_spout_classpath
        # avoid modification to cls.outputs
        _outputs = copy.copy(cls.outputs)
        if user_output_fields is not None:
            _outputs.extend(user_output_fields)
        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=True,
                                  par=par,
                                  inputs=None,
                                  outputs=_outputs,
                                  config=config)

    def initialize(self, config, context):
        user_spout_classpath = config.get(integ_const.USER_SPOUT_CLASSPATH,
                                          None)
        if user_spout_classpath is None:
            raise RuntimeError(
                "User defined integration test spout was not found")
        user_spout_cls = self._load_user_spout(context.get_topology_pex_path(),
                                               user_spout_classpath)
        self.user_spout = user_spout_cls(delegate=self)

        self.max_executions = config.get(integ_const.USER_MAX_EXECUTIONS,
                                         integ_const.MAX_EXECUTIONS)
        assert isinstance(self.max_executions, int) and self.max_executions > 0
        Log.info("Max executions: %d" % self.max_executions)
        self.tuples_to_complete = 0

        self.user_spout.initialize(config, context)

    @staticmethod
    def _load_user_spout(pex_file, classpath):
        pex_loader.load_pex(pex_file)
        cls = pex_loader.import_and_get_class(pex_file, classpath)
        return cls

    @property
    def is_done(self):
        return self.max_executions == 0

    def next_tuple(self):
        if self.is_done:
            return

        self.max_executions -= 1
        Log.info("max executions: %d" % self.max_executions)

        self.user_spout.next_tuple()

        if self.is_done:
            self._emit_terminal_if_needed()
            Log.info("This topology is finished.")

    def ack(self, tup_id):
        Log.info("Received an ack with tuple id: %s" % str(tup_id))
        self.tuples_to_complete -= 1
        if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID:
            self.user_spout.ack(tup_id)
        self._emit_terminal_if_needed()

    def fail(self, tup_id):
        Log.info("Received a fail message with tuple id: %s" % str(tup_id))
        self.tuples_to_complete -= 1
        if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID:
            self.user_spout.fail(tup_id)
        self._emit_terminal_if_needed()

    def emit(self,
             tup,
             tup_id=None,
             stream=Stream.DEFAULT_STREAM_ID,
             direct_task=None,
             need_task_ids=None):
        """Emits from this integration test spout

    Overriden method which will be called when user's spout calls emit()
    """
        # if is_control True -> control stream should not count
        self.tuples_to_complete += 1

        if tup_id is None:
            Log.info("Add tup_id for tuple: %s" % str(tup))
            _tup_id = integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID
        else:
            _tup_id = tup_id

        super(IntegrationTestSpout, self).emit(tup, _tup_id, stream,
                                               direct_task, need_task_ids)

    def _emit_terminal_if_needed(self):
        Log.info("is_done: %s, tuples_to_complete: %s" %
                 (self.is_done, self.tuples_to_complete))
        if self.is_done and self.tuples_to_complete == 0:
            Log.info("Emitting terminals to downstream")
            super(IntegrationTestSpout, self).emit(
                [integ_const.INTEGRATION_TEST_TERMINAL],
                stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID)
Ejemplo n.º 17
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()