Beispiel #1
0
 def _calculate_inputs(self):
     if self._operation == OperationType.Input:
         self._inputs = None
     elif self._operation == OperationType.Map or \
          self._operation == OperationType.FlatMap or \
          self._operation == OperationType.Filter or \
          self._operation == OperationType.Sample or \
          self._operation == OperationType.Repartition:
         self._inputs = {
             GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output):
             Grouping.SHUFFLE
         }
     elif self._operation == OperationType.Join:
         self._inputs = {}
         for parent in self._parents:
             self._inputs[GlobalStreamId(parent._stage_name, parent._output)] = \
                          Grouping.custom("heron.dsl.src.python.joinbolt.JoinGrouping")
     elif self._operation == OperationType.ReduceByKeyAndWindow:
         self._inputs = {GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output) :
                         Grouping.custom(\
                         "heron.dsl.src.python.reducebykeyandwindowbolt.ReduceGrouping")}
     elif self._operation == OperationType.Output:
         # FIXME:- is this correct
         self._inputs = {
             GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output):
             Grouping.SHUFFLE
         }
Beispiel #2
0
  def test_custom(self):
    # sane
    sane = Grouping.custom(DummyCustomGrouping())
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM"))
    self.assertTrue(isinstance(sane.python_serialized, bytes))

    # arg not string
    with self.assertRaises(TypeError):
      Grouping.custom(None)
    with self.assertRaises(TypeError):
      Grouping.custom(True)
Beispiel #3
0
  def test_custom(self):
    # sane
    sane = Grouping.custom("class.path")
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM"))
    self.assertTrue(isinstance(sane.python_serialized, bytes))

    # arg not string
    with self.assertRaises(TypeError):
      Grouping.custom(None)
    with self.assertRaises(TypeError):
      Grouping.custom(True)
Beispiel #4
0
  def test_is_grouping_sane(self):
    self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
    sane_fields = Grouping.fields(['hello', 'world'])
    self.assertTrue(Grouping.is_grouping_sane(sane_fields))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
    sane_custom = Grouping.custom(DummyCustomGrouping())
    self.assertTrue(Grouping.is_grouping_sane(sane_custom))
Beispiel #5
0
  def test_is_grouping_sane(self):
    self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
    sane_fields = Grouping.fields(['hello', 'world'])
    self.assertTrue(Grouping.is_grouping_sane(sane_fields))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
    sane_custom = Grouping.custom("class.path")
    self.assertTrue(Grouping.is_grouping_sane(sane_custom))
Beispiel #6
0
def fields_grouping_builder(topology_name, http_server_url):
  builder = TestTopologyBuilder(topology_name, http_server_url)
  ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400)

  count_bolt = builder.add_bolt("count-bolt", WordCountBolt,
                                inputs={ab_spout: Grouping.fields('word')}, par=2)

  builder.add_bolt("sum-bolt", CountAggregatorBolt,
                   inputs={count_bolt: Grouping.NONE}, par=1)

  return builder.create_topology()
Beispiel #7
0
  def test_fields(self):
    # sane
    sane = Grouping.fields(['word', 'count'])
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['word', 'count'])

    sane = Grouping.fields("just_a_word")
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['just_a_word'])

    # non-string
    with self.assertRaises(TypeError):
      Grouping.fields(['word', 'count', True])
    with self.assertRaises(TypeError):
      Grouping.fields(123)
    with self.assertRaises(TypeError):
      Grouping.fields(None)

    # fields not specified
    with self.assertRaises(ValueError):
      Grouping.fields()
Beispiel #8
0
  def test_fields(self):
    # sane
    sane = Grouping.fields(['word', 'count'])
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['word', 'count'])

    sane = Grouping.fields("just_a_word")
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['just_a_word'])

    # non-string
    with self.assertRaises(TypeError):
      Grouping.fields(['word', 'count', True])
    with self.assertRaises(TypeError):
      Grouping.fields(123)
    with self.assertRaises(TypeError):
      Grouping.fields(None)

    # fields not specified
    with self.assertRaises(ValueError):
      Grouping.fields()
Beispiel #9
0
    def _sanitize_inputs(self):
        """Sanitizes input fields and returns a map <GlobalStreamId -> Grouping>"""
        ret = {}
        if self.inputs is None:
            return

        if isinstance(self.inputs, dict):
            # inputs are dictionary, must be either <HeronComponentSpec -> Grouping> or
            # <GlobalStreamId -> Grouping>
            for key, grouping in self.inputs.items():
                if not Grouping.is_grouping_sane(grouping):
                    raise ValueError('A given grouping is not supported')
                if isinstance(key, HeronComponentSpec):
                    # use default streamid
                    if key.name is None:
                        # should not happen as TopologyType metaclass sets name attribute
                        # before calling this method
                        raise RuntimeError(
                            "In _sanitize_inputs(): HeronComponentSpec doesn't have a name"
                        )
                    global_streamid = GlobalStreamId(key.name,
                                                     Stream.DEFAULT_STREAM_ID)
                    ret[global_streamid] = grouping
                elif isinstance(key, GlobalStreamId):
                    ret[key] = grouping
                else:
                    raise ValueError("%s is not supported as a key to inputs" %
                                     str(key))
        elif isinstance(self.inputs, (list, tuple)):
            # inputs are lists, must be either a list of HeronComponentSpec or GlobalStreamId
            # will use SHUFFLE grouping
            for input_obj in self.inputs:
                if isinstance(input_obj, HeronComponentSpec):
                    if input_obj.name is None:
                        # should not happen as TopologyType metaclass sets name attribute
                        # before calling this method
                        raise RuntimeError(
                            "In _sanitize_inputs(): HeronComponentSpec doesn't have a name"
                        )
                    global_streamid = GlobalStreamId(input_obj.name,
                                                     Stream.DEFAULT_STREAM_ID)
                    ret[global_streamid] = Grouping.SHUFFLE
                elif isinstance(input_obj, GlobalStreamId):
                    ret[input_obj] = Grouping.SHUFFLE
                else:
                    raise ValueError("%s is not supported as an input" %
                                     str(input_obj))
        else:
            raise TypeError("Inputs must be a list, dict, or None, given: %s" %
                            str(self.inputs))

        return ret
Beispiel #10
0
class MultiStream(Topology):
    spout = MultiStreamSpout.spec(par=2)
    count_bolt = CountBolt.spec(
        par=2,
        inputs={spout: Grouping.fields('word')},
        config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
    stream_aggregator = StreamAggregateBolt.spec(
        par=1,
        inputs={
            spout: Grouping.ALL,
            spout['error']: Grouping.ALL
        },
        config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 15})
Beispiel #11
0
class CustomGrouping(Topology):
    word_spout = WordSpout.spec(par=1)
    consume_bolt = ConsumeBolt.spec(
        par=3,
        inputs={word_spout: Grouping.custom(SampleCustomGrouping())},
        config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
 def _calculate_inputs(self):
   return {GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output) :
           Grouping.custom("heron.dsl.src.python.reducebykeyandwindowbolt.ReduceGrouping")}
Beispiel #13
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()
Beispiel #14
0
from heron.examples.src.python.spout import WordSpout
from heron.examples.src.python.bolt import HalfAckBolt

# Topology is defined using a topology builder
# Refer to multi_stream_topology for defining a topology by subclassing Topology
if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Topology's name is not specified"
        sys.exit(1)

    builder = TopologyBuilder(name=sys.argv[1])

    word_spout = builder.add_spout("word_spout", WordSpout, par=2)
    half_ack_bolt = builder.add_bolt(
        "half_ack_bolt",
        HalfAckBolt,
        par=2,
        inputs={word_spout: Grouping.fields('word')},
        config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})

    topology_config = {
        constants.TOPOLOGY_RELIABILITY_MODE:
        constants.TopologyReliabilityMode.ATLEAST_ONCE,
        constants.TOPOLOGY_MAX_SPOUT_PENDING: 100000000,
        constants.TOPOLOGY_MESSAGE_TIMEOUT_SECS: 300
    }

    builder.set_config(topology_config)

    builder.build_and_submit()
Beispiel #15
0
 def _calculate_inputs(self):
     inputs = {}
     for parent in self._parents:
         inputs[GlobalStreamId(parent._stage_name, parent._output)] = \
                Grouping.custom("heron.dsl.src.python.joinbolt.JoinGrouping")
     return inputs
Beispiel #16
0
  def test_sanitize_inputs(self):
    # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
    # name attribute is set

    # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string")
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100)
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    # dict <HeronComponentSpec -> Grouping>
    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.SHUFFLE})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    from_spec.outputs = [Stream(name='another_stream')]
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec['another_stream']: Grouping.ALL})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.ALL})
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # dict <GlobalStreamId -> Grouping>
    inputs_dict = {GlobalStreamId("some_spout", "some_stream"): Grouping.NONE,
                   GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count'])}
    spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, inputs_dict)

    # list of HeronComponentSpec
    from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
    from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2])
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                           GlobalStreamId("spout2", "default"): Grouping.SHUFFLE})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec])
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # list of GlobalStreamId
    inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

    # list of neither GlobalStreamId nor HeronComponentSpec
    inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    with self.assertRaises(ValueError):
      spec._sanitize_inputs()