def _calculate_inputs(self): if self._operation == OperationType.Input: self._inputs = None elif self._operation == OperationType.Map or \ self._operation == OperationType.FlatMap or \ self._operation == OperationType.Filter or \ self._operation == OperationType.Sample or \ self._operation == OperationType.Repartition: self._inputs = { GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output): Grouping.SHUFFLE } elif self._operation == OperationType.Join: self._inputs = {} for parent in self._parents: self._inputs[GlobalStreamId(parent._stage_name, parent._output)] = \ Grouping.custom("heron.dsl.src.python.joinbolt.JoinGrouping") elif self._operation == OperationType.ReduceByKeyAndWindow: self._inputs = {GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output) : Grouping.custom(\ "heron.dsl.src.python.reducebykeyandwindowbolt.ReduceGrouping")} elif self._operation == OperationType.Output: # FIXME:- is this correct self._inputs = { GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output): Grouping.SHUFFLE }
def test_custom(self): # sane sane = Grouping.custom(DummyCustomGrouping()) self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM")) self.assertTrue(isinstance(sane.python_serialized, bytes)) # arg not string with self.assertRaises(TypeError): Grouping.custom(None) with self.assertRaises(TypeError): Grouping.custom(True)
def test_custom(self): # sane sane = Grouping.custom("class.path") self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM")) self.assertTrue(isinstance(sane.python_serialized, bytes)) # arg not string with self.assertRaises(TypeError): Grouping.custom(None) with self.assertRaises(TypeError): Grouping.custom(True)
def test_is_grouping_sane(self): self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL)) self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE)) self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST)) self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE)) self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS)) sane_fields = Grouping.fields(['hello', 'world']) self.assertTrue(Grouping.is_grouping_sane(sane_fields)) self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM)) sane_custom = Grouping.custom(DummyCustomGrouping()) self.assertTrue(Grouping.is_grouping_sane(sane_custom))
def test_is_grouping_sane(self): self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL)) self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE)) self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST)) self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE)) self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS)) sane_fields = Grouping.fields(['hello', 'world']) self.assertTrue(Grouping.is_grouping_sane(sane_fields)) self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM)) sane_custom = Grouping.custom("class.path") self.assertTrue(Grouping.is_grouping_sane(sane_custom))
def fields_grouping_builder(topology_name, http_server_url): builder = TestTopologyBuilder(topology_name, http_server_url) ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400) count_bolt = builder.add_bolt("count-bolt", WordCountBolt, inputs={ab_spout: Grouping.fields('word')}, par=2) builder.add_bolt("sum-bolt", CountAggregatorBolt, inputs={count_bolt: Grouping.NONE}, par=1) return builder.create_topology()
def test_fields(self): # sane sane = Grouping.fields(['word', 'count']) self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['word', 'count']) sane = Grouping.fields("just_a_word") self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['just_a_word']) # non-string with self.assertRaises(TypeError): Grouping.fields(['word', 'count', True]) with self.assertRaises(TypeError): Grouping.fields(123) with self.assertRaises(TypeError): Grouping.fields(None) # fields not specified with self.assertRaises(ValueError): Grouping.fields()
def _sanitize_inputs(self): """Sanitizes input fields and returns a map <GlobalStreamId -> Grouping>""" ret = {} if self.inputs is None: return if isinstance(self.inputs, dict): # inputs are dictionary, must be either <HeronComponentSpec -> Grouping> or # <GlobalStreamId -> Grouping> for key, grouping in self.inputs.items(): if not Grouping.is_grouping_sane(grouping): raise ValueError('A given grouping is not supported') if isinstance(key, HeronComponentSpec): # use default streamid if key.name is None: # should not happen as TopologyType metaclass sets name attribute # before calling this method raise RuntimeError( "In _sanitize_inputs(): HeronComponentSpec doesn't have a name" ) global_streamid = GlobalStreamId(key.name, Stream.DEFAULT_STREAM_ID) ret[global_streamid] = grouping elif isinstance(key, GlobalStreamId): ret[key] = grouping else: raise ValueError("%s is not supported as a key to inputs" % str(key)) elif isinstance(self.inputs, (list, tuple)): # inputs are lists, must be either a list of HeronComponentSpec or GlobalStreamId # will use SHUFFLE grouping for input_obj in self.inputs: if isinstance(input_obj, HeronComponentSpec): if input_obj.name is None: # should not happen as TopologyType metaclass sets name attribute # before calling this method raise RuntimeError( "In _sanitize_inputs(): HeronComponentSpec doesn't have a name" ) global_streamid = GlobalStreamId(input_obj.name, Stream.DEFAULT_STREAM_ID) ret[global_streamid] = Grouping.SHUFFLE elif isinstance(input_obj, GlobalStreamId): ret[input_obj] = Grouping.SHUFFLE else: raise ValueError("%s is not supported as an input" % str(input_obj)) else: raise TypeError("Inputs must be a list, dict, or None, given: %s" % str(self.inputs)) return ret
class MultiStream(Topology): spout = MultiStreamSpout.spec(par=2) count_bolt = CountBolt.spec( par=2, inputs={spout: Grouping.fields('word')}, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10}) stream_aggregator = StreamAggregateBolt.spec( par=1, inputs={ spout: Grouping.ALL, spout['error']: Grouping.ALL }, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 15})
class CustomGrouping(Topology): word_spout = WordSpout.spec(par=1) consume_bolt = ConsumeBolt.spec( par=3, inputs={word_spout: Grouping.custom(SampleCustomGrouping())}, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
def _calculate_inputs(self): return {GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output) : Grouping.custom("heron.dsl.src.python.reducebykeyandwindowbolt.ReduceGrouping")}
def test_sanitize_inputs(self): # Note that _sanitize_inputs() should only be called after HeronComponentSpec's # name attribute is set # invalid inputs given as argument (valid ones are either dict, list, tuple or None) invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string") with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100) with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() # dict <HeronComponentSpec -> Grouping> from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.SHUFFLE}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE}) from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) from_spec.outputs = [Stream(name='another_stream')] to_spec = HeronComponentSpec( "bolt", "bl_clspath", False, 1, inputs={from_spec['another_stream']: Grouping.ALL}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.ALL}) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # dict <GlobalStreamId -> Grouping> inputs_dict = { GlobalStreamId("some_spout", "some_stream"): Grouping.NONE, GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count']) } spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict) ret = spec._sanitize_inputs() self.assertEqual(ret, inputs_dict) # list of HeronComponentSpec from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1) from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1) to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2]) ret = to_spec._sanitize_inputs() self.assertEqual( ret, { GlobalStreamId("spout1", "default"): Grouping.SHUFFLE, GlobalStreamId("spout2", "default"): Grouping.SHUFFLE }) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec]) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # list of GlobalStreamId inputs_list = [ GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream") ] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) with self.assertRaises(ValueError): spec._sanitize_inputs()
from heron.examples.src.python.spout import WordSpout from heron.examples.src.python.bolt import HalfAckBolt # Topology is defined using a topology builder # Refer to multi_stream_topology for defining a topology by subclassing Topology if __name__ == '__main__': if len(sys.argv) != 2: print "Topology's name is not specified" sys.exit(1) builder = TopologyBuilder(name=sys.argv[1]) word_spout = builder.add_spout("word_spout", WordSpout, par=2) half_ack_bolt = builder.add_bolt( "half_ack_bolt", HalfAckBolt, par=2, inputs={word_spout: Grouping.fields('word')}, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10}) topology_config = { constants.TOPOLOGY_RELIABILITY_MODE: constants.TopologyReliabilityMode.ATLEAST_ONCE, constants.TOPOLOGY_MAX_SPOUT_PENDING: 100000000, constants.TOPOLOGY_MESSAGE_TIMEOUT_SECS: 300 } builder.set_config(topology_config) builder.build_and_submit()
def _calculate_inputs(self): inputs = {} for parent in self._parents: inputs[GlobalStreamId(parent._stage_name, parent._output)] = \ Grouping.custom("heron.dsl.src.python.joinbolt.JoinGrouping") return inputs
def test_sanitize_inputs(self): # Note that _sanitize_inputs() should only be called after HeronComponentSpec's # name attribute is set # invalid inputs given as argument (valid ones are either dict, list, tuple or None) invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string") with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100) with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() # dict <HeronComponentSpec -> Grouping> from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.SHUFFLE}) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE}) from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) from_spec.outputs = [Stream(name='another_stream')] to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec['another_stream']: Grouping.ALL}) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.ALL}) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # dict <GlobalStreamId -> Grouping> inputs_dict = {GlobalStreamId("some_spout", "some_stream"): Grouping.NONE, GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count'])} spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict) ret = spec._sanitize_inputs() self.assertEqual(ret, inputs_dict) # list of HeronComponentSpec from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1) from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1) to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2]) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout1", "default"): Grouping.SHUFFLE, GlobalStreamId("spout2", "default"): Grouping.SHUFFLE}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec]) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # list of GlobalStreamId inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) with self.assertRaises(ValueError): spec._sanitize_inputs()