def test_normalize(self): self.assertIsNone(_sch._normalize(None)) for cs in _sch.CommonSchema: self.assertEqual(cs, _sch._normalize(cs)) s = _sch.StreamSchema('tuple<int32 a>') self.assertEqual(s, _sch._normalize(s)) s = _sch.StreamSchema('MyCoolSchema') self.assertEqual(s, _sch._normalize(s)) self.assertEqual(_sch.CommonSchema.Python, _sch._normalize(object)) _u = str self.assertEqual(_sch.CommonSchema.String, _sch._normalize(_u)) import json self.assertEqual(_sch.CommonSchema.Json, _sch._normalize(json)) self.assertIsInstance(_sch._normalize('tuple<int32 b>'), _sch.StreamSchema) self.assertIsInstance(_sch._normalize('MyCoolSchema'), _sch.StreamSchema) self.assertRaises(ValueError, _sch._normalize, False) import typing Employee = typing.NamedTuple('Employee', [('name', str), ('id', int)]) nts = _sch._normalize(Employee) self.assertIsInstance(nts, _sch.StreamSchema) self.assertEqual('tuple<rstring name, int64 id>', nts._schema) AllSPLTypes = typing.NamedTuple('AllSPLTypes', [ ('b', bool), ('i64', int), ('f64', float), ('c64', complex), ('d128', decimal.Decimal), ('s', _u), ('li64', typing.List[int]), ('lf64', typing.List[float]), ('mi64b', typing.Mapping[int,bool]), ('llf64', typing.List[typing.List[float]]), ('mi64li64', typing.Mapping[int,typing.List[int]]), ('sc64', typing.Set[complex]), ('sli64', typing.Set[typing.List[int]]), ('ts_spl', streamsx.spl.types.Timestamp), ('ts_dt', datetime.datetime), ('binary', bytes), ('oi64', typing.Optional[int]), ('of64', typing.Union[float, None]), ('ob', typing.Union[None, bool]), ]) nts = _sch._normalize(AllSPLTypes) self.assertIsInstance(nts, _sch.StreamSchema) self.assertEqual('tuple<boolean b, int64 i64, float64 f64, complex64 c64, decimal128 d128, rstring s, list<int64> li64, list<float64> lf64, map<int64, boolean> mi64b, list<list<float64>> llf64, map<int64, list<int64>> mi64li64, set<complex64> sc64, set<list<int64>> sli64, timestamp ts_spl, timestamp ts_dt, blob binary, optional<int64> oi64, optional<float64> of64, optional<boolean> ob>', nts._schema) self.assertEqual('AllSPLTypes', nts.style.__name__) ont = nts.style self.assertEqual(ont._fields, AllSPLTypes._fields) for n in ont._field_types: if n == 'ts_dt': self.assertEqual(streamsx.spl.types.Timestamp, ont._field_types[n]) else: self.assertEqual(ont._field_types[n], AllSPLTypes._field_types[n])
def test_primitive_foreach(self): iterations = 3000 topo = Topology() topo.checkpoint_period = timedelta(seconds=1) streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) timeCounter = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': iterations, 'period': 0.01 }) timeCounter.stream.set_consistent( ConsistentRegionConfig.periodic(5, drain_timeout=40, reset_timeout=40, max_consecutive_attempts=6)) fizzbuzz = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::FizzBuzzPrimitive", timeCounter.stream, schema.StreamSchema('tuple<int32 f, rstring c>').as_tuple()) verify = op.Sink("com.ibm.streamsx.topology.pytest.checkpoint::Verify", fizzbuzz.stream) s = fizzbuzz.stream tester = Tester(topo) tester.resets() tester.tuple_count(s, iterations) tester.test(self.test_ctxtype, self.test_config)
def main(): #define needed variables COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to EVENTS_TOPIC = "streamsx/iot/device/events" #topic to subscribe to for events incoming_schema = schema.StreamSchema( "tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>" ) cmd_schema = schema.StreamSchema( 'tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>' ) topo = Topology('ReadingsFromIot') #Subscribe to events events = topo.subscribe(EVENTS_TOPIC, incoming_schema) sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors") readings = sensor_events.map(get_event_data) readings.print() #send a command cmd_stream = sensor_events.map(get_cmd) #convert the commands stream to a SPL structured schema commands_to_publish = cmd_stream.map(lambda x: ( x["typeId"], x["deviceId"], x["cmdId"], x["jsonString"], ), schema=cmd_schema) commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema) commands_to_publish.print() result = submit_to_service(topo) print("Submitted job to the service, job id = " + str(result.job.id))
def test_source(self): topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) bop = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': 30, 'period': 0.1 }) s = bop.stream s.set_consistent( ConsistentRegionConfig.operator_driven(drain_timeout=40, reset_timeout=40, max_consecutive_attempts=3)) tester = Tester(topo) self.assertFalse( tester.test(self.test_ctxtype, self.test_config, assert_on_fail=False))
def test_filter_map(self): topo = Topology() topo.checkpoint_period = timedelta(seconds=1) streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) timeCounter = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': 30, 'period': 0.1 }) evenFilter = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::StatefulEvenFilter", timeCounter.stream, None, params={}) hpo = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::StatefulHalfPlusOne", evenFilter.stream, None, params={}) s = hpo.stream tester = Tester(topo) tester.tuple_count(s, 15) tester.contents(s, list(zip(range(1, 16)))) tester.test(self.test_ctxtype, self.test_config)
def test_beacon(self): # An operator-driven consistent region can be used with a source # that supports it, such as Beacon iterations = 5000 topo = Topology() beacon = op.Source(topo, "spl.utility::Beacon", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': iterations, 'period': 0.01, 'triggerCount': streamsx.spl.types.uint32(500) }) beacon.f = beacon.output('(int32)IterationCount()') s = beacon.stream s.set_consistent( ConsistentRegionConfig.operator_driven(drain_timeout=40, reset_timeout=40, max_consecutive_attempts=4)) tester = Tester(topo) # For operator-driven regions, the resetter uses a random interval # from 10-40 seconds for resets. Only one is likely to be completed # while processing tuples for this test. tester.resets(1) tester.tuple_count(s, iterations) tester.contents(s, list(zip(range(0, iterations)))) tester.test(self.test_ctxtype, self.test_config)
def test_source(self): iterations = 3000 topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) bop = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': iterations, 'period': 0.01 }) s = bop.stream s.set_consistent( ConsistentRegionConfig.periodic(5, drain_timeout=40, reset_timeout=40, max_consecutive_attempts=6)) tester = Tester(topo) tester.resets() tester.tuple_count(s, iterations) tester.contents(s, list(zip(range(0, iterations)))) # job_config = streamsx.topology.context.JobConfig(tracing='debug') # job_config.add(self.test_config) tester.test(self.test_ctxtype, self.test_config)
def test_enter_exit(self): topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) source = op.Source( topo, 'com.ibm.streamsx.topology.pytest.checkpoint::EnterExitSource', schema.StreamSchema( 'tuple<rstring from, int32 enter, int32 exit>').as_tuple(), params={'period': 0.1}) source.stream.set_consistent( ConsistentRegionConfig.periodic(5, drain_timeout=40, reset_timeout=40, max_consecutive_attempts=6)) transit = op.Map( 'com.ibm.streamsx.topology.pytest.checkpoint::EnterExitMap', source.stream, schema.StreamSchema( 'tuple<rstring from, int32 enter, int32 exit>').as_tuple()) tester = Tester(topo) tester.resets(10) # On each operator, __enter__ and __exit__ should be called once for # each reset. Also __enter__ should be called at startup and __exit__ # at shutdown. It is hard to verify the final __exit__ call (and that # is handled by python rather than our code), so # the test is valid if the number of __enter__ calls is one more than # the number of resets, and the number of __exit__ calls is equal to # number of resets. The tuples on the two streams indicate the number # of times __enter__ and __exit__ have been called. # We are looking for two specific tuples: # ('source', 6, 5) and ('transit', 6, 5) tester.eventual_result( source.stream, lambda tuple_: True if tuple_[1] >= 6 and tuple_[1] == tuple_[2] + 1 else Fale if tuple_[1] != tuple_[2] + 1 else None) tester.eventual_result( transit.stream, lambda tuple_: True if tuple_[1] >= 6 and tuple_[1] == tuple_[2] + 1 else Fale if tuple_[1] != tuple_[2] + 1 else None) job_config = streamsx.topology.context.JobConfig(tracing='debug') job_config.add(self.test_config) tester.test(self.test_ctxtype, self.test_config)
def main(): local = sys.argv[1] == "local" #define needed variables COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to EVENTS_TOPIC = "streamsx/iot/device/events" #topic to subscribe to for events incoming_schema = schema.StreamSchema( "tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>" ) cmd_schema = schema.StreamSchema( 'tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>' ) topo = Topology('ReadingsFromIot') #Subscribe to events events = topo.subscribe(EVENTS_TOPIC, incoming_schema, "AllEventsAsJSON") sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors", "SensorEventsAsJSON") readings = sensor_events.map(get_event_data, "ReadingsStream") readings.print() #send a command cmd_stream = sensor_events.map(get_cmd, "CommandsAsJSON") #convert the commands stream to a SPL structured schema commands_to_publish = cmd_stream.map(lambda x: ( x["typeId"], x["deviceId"], x["cmdId"], x["jsonString"], ), schema=cmd_schema, name="CommandsToPublish") commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema) if local and len(sys.argv) > 2: username = sys.argv[2] password = sys.argv[3] result = submit_to_service(topo, local, username, password) else: result = submit_to_service(topo, local) print("Submitted job to the service, job id = " + str(result.job.id))
def test_class_source(self): count = 43 topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, '../testtkpy') bop = op.Source(topo, "com.ibm.streamsx.topology.pysamples.sources::Range", schema.StreamSchema('tuple<int64 c>').as_tuple(), params={'count':count}) r = bop.stream self.tester = Tester(topo) self.tester.tuple_count(r, count) self.tester.contents(r, list(zip(range(count)))) self.tester.test(self.test_ctxtype, self.test_config)
def test_named_schema(self): s = _sch.StreamSchema('tuple<int32 a, boolean alert>') nt1 = s._namedtuple() nt2 = s._namedtuple() self.assertIs(nt1, nt2) t = nt1(345, False) self.assertEqual(345, t.a) self.assertFalse(t.alert) self.assertEqual(345, t[0]) self.assertFalse(t[1])
def test_fn_source(self): count = 37 topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) bop = op.Source( topo, "com.ibm.streamsx.topology.pysamples.sources::Range37", schema.StreamSchema('tuple<int64 c>').as_tuple()) r = bop.stream self.tester = Tester(topo) self.tester.tuple_count(r, count) self.tester.contents(r, list(zip(range(count)))) self.tester.test(self.test_ctxtype, self.test_config)
def test_map_foreach(self): iterations = 3000 topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) timeCounter = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': iterations, 'period': 0.01 }) timeCounter.stream.set_consistent( ConsistentRegionConfig.periodic(5, drain_timeout=40, reset_timeout=40, max_consecutive_attempts=6)) fizzbuzz = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::FizzBuzzMap", timeCounter.stream, schema.StreamSchema('tuple<int32 f, rstring c>').as_tuple()) verify = op.Sink("com.ibm.streamsx.topology.pytest.checkpoint::Verify", fizzbuzz.stream) s = fizzbuzz.stream tester = Tester(topo) tester.resets() tester.tuple_count(s, iterations) # Find the expected results. fizz = lambda x: (x[0], x[1] + 'fizz' if x[0] % 3 == 0 else x[1]) buzz = lambda x: (x[0], x[1] + 'buzz' if x[0] % 5 == 0 else x[1]) expected = list( map(buzz, (map(fizz, (map(lambda x: (x, ''), range(iterations))))))) tester.contents(s, expected) tester.test(self.test_ctxtype, self.test_config)
def test_primitive_foreach(self): topo = Topology() topo.checkpoint_period = timedelta(seconds=1) streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) timeCounter = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': 30, 'period': 0.1 }) fizzbuzz = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::FizzBuzzPrimitive", timeCounter.stream, schema.StreamSchema('tuple<int32 f, rstring c>').as_tuple()) verify = op.Sink("com.ibm.streamsx.topology.pytest.checkpoint::Verify", fizzbuzz.stream) s = fizzbuzz.stream tester = Tester(topo) tester.tuple_count(s, 30) tester.test(self.test_ctxtype, self.test_config)
def test_mt(self): topo = Topology() N = 1000 streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) b1 = op.Source(topo, "spl.utility::Beacon", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={'iterations': N}) b1.f = b1.output('(int32)IterationCount()') b2 = op.Source(topo, "spl.utility::Beacon", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={'iterations': N}) b2.f = b2.output(str(N) + ' + (int32)IterationCount()') b3 = op.Source(topo, "spl.utility::Beacon", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={'iterations': N}) b3.f = b3.output(str(2 * N) + ' + (int32)IterationCount()') s1 = b1.stream.low_latency() s2 = b2.stream.low_latency() s3 = b3.stream.low_latency() s = s1.union({s2, s3}) f = op.Map("com.ibm.streamsx.topology.pytest.mt::MTFilter", s) m = op.Map("com.ibm.streamsx.topology.pytest.mt::MTMap", f.stream) op.Sink("com.ibm.streamsx.topology.pytest.mt::MTForEach", f.stream) cr = m.stream.flat_map() tester = Tester(topo) tester.tuple_count(m.stream, 3 * N) tester.contents(cr, range(3 * N), ordered=False) tester.test(self.test_ctxtype, self.test_config)
def test_styles(self): s = _sch.StreamSchema('tuple<int32 a, boolean alert>') self.assertEqual(dict, s.style) st = s.as_tuple() self.assertIsNot(s, st) self.assertEqual(tuple, st.style) sd = s.as_dict() self.assertIs(s, sd) self.assertEqual(dict, sd.style) sd2 = st.as_dict() self.assertIsNot(st, sd2) self.assertEqual(dict, sd2.style) self.assertEqual(object, _sch.CommonSchema.Python.value.style) self.assertEqual(unicode if sys.version_info.major == 2 else str, _sch.CommonSchema.String.value.style) self.assertEqual(dict, _sch.CommonSchema.Json.value.style) snt = s.as_tuple(named='Alert') self.assertIsNot(s, snt) self.assertTrue(issubclass(snt.style, tuple)) self.assertTrue(hasattr(snt.style, '_fields')) self.assertTrue(hasattr(snt.style, '_splpy_namedtuple')) self.assertTrue('Alert', snt.style._splpy_namedtuple) tv = snt.style(23, True) self.assertEqual(23, tv[0]) self.assertEqual(23, tv.a) self.assertTrue(tv[1]) self.assertTrue(tv.alert) self.assertTrue(str(tv).startswith('Alert(')) snt2 = s.as_tuple(named=True) self.assertIsNot(s, snt2) self.assertIsNot(snt, snt2) self.assertTrue(issubclass(snt2.style, tuple)) self.assertTrue(hasattr(snt2.style, '_fields')) self.assertTrue(hasattr(snt2.style, '_splpy_namedtuple')) self.assertTrue('StreamTuple', snt2.style._splpy_namedtuple) tv = snt2.style(83, False) self.assertEqual(83, tv[0]) self.assertEqual(83, tv.a) self.assertFalse(tv[1]) self.assertFalse(tv.alert) self.assertTrue(str(tv).startswith('StreamTuple('))
def test_equality(self): s1 = _sch.StreamSchema('tuple<int32 a, int64 b>') s2 = _sch.StreamSchema('tuple<int32 a, int64 b>') sn = _sch.StreamSchema('tuple<int32 a, int32 b>') self.assertTrue(s1 == s2) self.assertFalse(s1 != s2) self.assertFalse(s1 == sn) self.assertTrue(s1 != sn) s1t = s1.as_tuple() s2t = s1.as_tuple() self.assertTrue(s1t == s2t) self.assertFalse(s1t != s2t) self.assertFalse(s1 == s1t) self.assertTrue(s1 != s1t) s1nt = s1.as_tuple(named=True) s2nt = s2.as_tuple(named=True) self.assertTrue(s1nt == s2nt) self.assertFalse(s1nt != s2nt) self.assertFalse(s1 == s1nt) self.assertFalse(s1t == s1nt)
def test_styles(self): s = _sch.StreamSchema('tuple<int32 a, boolean alert>') self.assertEqual(dict, s.style) st = s.as_tuple() self.assertIsNot(s, st) self.assertEqual(tuple, st.style) sd = s.as_dict() self.assertIs(s, sd) self.assertEqual(dict, sd.style) sd2 = st.as_dict() self.assertIsNot(st, sd2) self.assertEqual(dict, sd2.style) self.assertEqual(object, _sch.CommonSchema.Python.value.style) self.assertEqual(str, _sch.CommonSchema.String.value.style) self.assertEqual(dict, _sch.CommonSchema.Json.value.style)
def subscribe(self,topic) : parms = self.config.copy() if(parms['retain'] is not None): del parms['retain'] parms['topics'] = topic parms['topicOutAttrName'] = "topic" parms['dataAttributeName'] = "string" if (++self.opCnt > 1) : # each op requires its own clientID clientId = parms['clientID'] if (clientId is not None and len(clientId) > 0) : parms['clientID'] = clientId+"-"+str(id(self))+"-"+str(self.opCnt) op = self.topology.graph.addOperator(kind="com.ibm.streamsx.messaging.mqtt::MQTTSource") oport = op.addOutputPort(schema=schema.StreamSchema("tuple<rstring topic, rstring string>")) op.setParameters(parms) pop = self.topology.graph.addPassThruOperator() pop.addInputPort(outputPort=oport) pOport = pop.addOutputPort(schema=schema.CommonSchema.String) return Stream(self.topology, pOport)
def test_source(self): topo = Topology() topo.checkpoint_period = timedelta(seconds=1) streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) bop = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': 30, 'period': 0.1 }) # streamsx.topology.context.submit('TOOLKIT', topo) s = bop.stream tester = Tester(topo) tester.tuple_count(s, 30) tester.contents(s, list(zip(range(0, 30)))) tester.test(self.test_ctxtype, self.test_config)
def test_filter_map(self): iterations = 3000 topo = Topology() streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) timeCounter = op.Source( topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={ 'iterations': iterations, 'period': 0.01 }) timeCounter.stream.set_consistent( ConsistentRegionConfig.periodic(5, drain_timeout=40, reset_timeout=40, max_consecutive_attempts=6)) evenFilter = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::StatefulEvenFilter", timeCounter.stream, None, params={}) hpo = op.Map( "com.ibm.streamsx.topology.pytest.checkpoint::StatefulHalfPlusOne", evenFilter.stream, None, params={}) s = hpo.stream tester = Tester(topo) tester.resets() tester.tuple_count(s, iterations / 2) tester.contents(s, list(zip(range(1, int((iterations / 2) + 1))))) tester.test(self.test_ctxtype, self.test_config)
def parallel(self, width, routing=None, func=None): """ Parallelizes the stream into `width` parallel channels. Tuples are routed to parallel channels such that an even distribution is maintained. Each parallel channel can be thought of as being assigned its own thread. As such, each parallelized stream function are separate instances and operate independently from one another. parallel() will only parallelize the stream operations performed after the call to parallel() and before the call to end_parallel(). Parallel regions aren't required to have an output stream, and thus may be used as sinks. In other words, a parallel sink is created by calling parallel() and creating a sink operation. It is not necessary to invoke end_parallel() on parallel sinks. Nested parallelism is not currently supported. A call to parallel() should never be made immediately after another call to parallel() without having an end_parallel() in between. Every call to end_parallel() must have a call to parallel() preceding it. Args: width (int): degree of parallelism routing - denotes what type of tuple routing to use. ROUND_ROBIN: delivers tuples in round robin fashion to downstream operators HASH_PARTIONED: delivers to downstream operators based on the hash of the tuples being sent or if a function is provided the function will be called to provide the hash func - Optional function called when HASH_PARTIONED routing is specified. The function provides an int32 value to be used as the hash that determines the tuple routing to downstream operators Returns: Stream """ if (routing == None or routing == Routing.ROUND_ROBIN): iop = self.isolate() op2 = self.topology.graph.addOperator("$Parallel$") op2.addInputPort(outputPort=iop.oport) oport = op2.addOutputPort(width) return Stream(self.topology, oport) elif (routing == Routing.HASH_PARTITIONED): if (func is None): func = hash op = self.topology.graph.addOperator( "com.ibm.streamsx.topology.functional.python::PyFunctionHashAdder", func) hash_schema = self.oport.schema.extend( schema.StreamSchema("tuple<int32 __spl_hash>")) parentOp = op.addOutputPort(schema=hash_schema) op.addInputPort(outputPort=self.oport) iop = self.topology.graph.addOperator("$Isolate$") oport = iop.addOutputPort(schema=hash_schema) iop.addInputPort(outputPort=parentOp) op2 = self.topology.graph.addOperator("$Parallel$") op2.addInputPort(outputPort=oport) o2port = op2.addOutputPort(oWidth=width, schema=hash_schema, partitioned=True) # use the Functor passthru operator to effectively remove the hash attribute by removing it from output port schema hrop = self.topology.graph.addPassThruOperator() hrop.addInputPort(outputPort=o2port) hrOport = hrop.addOutputPort(schema=self.oport.schema) return Stream(self.topology, hrOport) else: raise TypeError( "Invalid routing type supplied to the parallel operator")
def test_source(self): topo = Topology("test") topo.checkpoint_period = timedelta(seconds=1) streamsx.spl.toolkit.add_toolkit(topo, stu._tk_dir('testtkpy')) bop = op.Source(topo, "com.ibm.streamsx.topology.pytest.checkpoint::TimeCounter", schema.StreamSchema('tuple<int32 f>').as_tuple(), params={'iterations':30,'period':0.1}) s = bop.stream tester = Tester(topo) tester.tuple_count(s, 30) #tester.contents(s, range(0,30)) # why doesn't this work? tester.contents(s, list(zip(range(0,30)))) tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True)
payload = json.loads(payload_json) return payload["d"] # Create linear regression object #regr = linear_model.LinearRegression() # Train the model using the training sets #regr.fit(diabetes_X_train, diabetes_y_train) #define needed variables COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to #COMMANDS_TOPIC = "iot-2/type/SMARTBIN_PI_V2/id/SMARTBIN001/cmd/command/fmt/json" #topic to publish commands to #EVENTS_TOPIC = "iot-2/type/SMARTBIN_PI_V2/id/SMARTBIN001/evt/status/fmt/json" #topic to subscribe to for events EVENTS_TOPIC = "streamsx/iot/device/events" incoming_schema = schema.StreamSchema( "tuple <rstring typeId, rstring deviceId, rstring eventId, rstring jsonString>" ) cmd_schema = schema.StreamSchema( 'tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>' ) #cmd_schema = schema.StreamSchema('tuple<rstring d>') #Topology object is the Streams application graph topology = Topology('ReadingsFromIot') #Subscribe to events events = topology.subscribe(EVENTS_TOPIC, incoming_schema, "AllEventsAsJSON") sensor_events = events.filter(lambda tuple: tuple["eventId"] == "status", "SensorEventsAsJSON") # sensor_events operator passes a tuple to get_event_data() function ## use flat_map() to split data such as gps coordinate to x and y, same for sensor data to sensor1, sensor2,...
def parallel(self, width, routing=Routing.ROUND_ROBIN, func=None): """ Parallelizes the stream into `width` parallel channels. Tuples are routed to parallel channels such that an even distribution is maintained. Each parallel channel can be thought of as being assigned its own thread. As such, each parallelized stream function are separate instances and operate independently from one another. parallel() will only parallelize the stream operations performed after the call to parallel() and before the call to :py:meth:`~Stream.end_parallel`. Parallel regions aren't required to have an output stream, and thus may be used as sinks. In other words, a parallel sink is created by calling parallel() and creating a sink operation. It is not necessary to invoke end_parallel() on parallel sinks. Nested parallelism is not currently supported. A call to parallel() should never be made immediately after another call to parallel() without having an end_parallel() in between. Every call to end_parallel() must have a call to parallel() preceding it. Args: width (int): Degree of parallelism. routing(Routing): denotes what type of tuple routing to use. func: Optional function called when :py:const:`Routing.HASH_PARTITIONED` routing is specified. The function provides an integer value to be used as the hash that determines the tuple channel routing. Returns: Stream: A stream for which subsequent transformations will be executed in parallel. """ if routing == None or routing == Routing.ROUND_ROBIN: op2 = self.topology.graph.addOperator("$Parallel$") op2.addInputPort(outputPort=self.oport) oport = op2.addOutputPort(width) return Stream(self.topology, oport) elif routing == Routing.HASH_PARTITIONED: if (func is None): if self.oport.schema == schema.CommonSchema.String: keys = ['string'] parallel_input = self.oport elif self.oport.schema == schema.CommonSchema.Python: func = hash else: raise NotImplementedError( "HASH_PARTITIONED for schema {0} requires a hash function." .format(self.oport.schema)) if func is not None: keys = ['__spl_hash'] hash_adder = self.topology.graph.addOperator( self.topology.opnamespace + "::HashAdder", func) hash_schema = self.oport.schema.extend( schema.StreamSchema("tuple<int64 __spl_hash>")) hash_adder.addInputPort(outputPort=self.oport) parallel_input = hash_adder.addOutputPort(schema=hash_schema) parallel_op = self.topology.graph.addOperator("$Parallel$") parallel_op.addInputPort(outputPort=parallel_input) parallel_op_port = parallel_op.addOutputPort( oWidth=width, schema=parallel_input.schema, partitioned_keys=keys) if func is not None: # use the Functor passthru operator to remove the hash attribute by removing it from output port schema hrop = self.topology.graph.addPassThruOperator() hrop.addInputPort(outputPort=parallel_op_port) parallel_op_port = hrop.addOutputPort(schema=self.oport.schema) return Stream(self.topology, parallel_op_port) else: raise TypeError( "Invalid routing type supplied to the parallel operator")
def test_bounded_schema(self): s = _sch.StreamSchema('tuple<rstring[1] a, boolean alert>') s = _sch.StreamSchema('tuple<map<int32,rstring>[8] a>') s = _sch.StreamSchema('tuple<list<int32>[100] a>') s = _sch.StreamSchema('tuple<set<list<int32>[9]>[100] a>')