def _test_submit_sab(self):
        topo = Topology('SabTest', namespace='mynamespace')
        s = topo.source([1,2])
        es = s.for_each(lambda x : None)
        bb = streamsx.topology.context.submit('BUNDLE', topo, {})
        self.assertIn('bundlePath', bb)
        self.assertIn('jobConfigPath', bb)

        sas = self.sc.get_streaming_analytics()

        sr = sas.submit_job(bundle=bb['bundlePath'])
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('name', sr)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        cr = sas.cancel_job(job_id=job_id)

        jn = 'SABTEST:' + str(time.time())
        jc = streamsx.topology.context.JobConfig(job_name=jn)
        sr = sas.submit_job(bundle=bb['bundlePath'], job_config=jc)
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        self.assertIn('name', sr)
        self.assertEqual(jn, sr['name'])
        cr = sas.cancel_job(job_id=job_id)
       
        os.remove(bb['bundlePath'])
        os.remove(bb['jobConfigPath'])
Esempio n. 2
0
    def test_schemas_bad(self):
        topo = Topology()
        pyObjStream = topo.source(['Hello', 'World!'])
        binStream = pyObjStream.map(func=lambda s: bytes("ABC", utf - 8),
                                    schema=CommonSchema.Binary)
        xmlStream = pyObjStream.map(schema=CommonSchema.XML)
        binMsgMetaStream = pyObjStream.map(func=lambda s: {
            'message': bytes(s, 'utf-8'),
            'key': s
        },
                                           schema=MsgSchema.BinaryMessageMeta)
        strMsgMetaStream = pyObjStream.map(func=lambda s: {
            'message': s,
            'key': s
        },
                                           schema=MsgSchema.StringMessageMeta)
        otherSplTupleStream1 = pyObjStream.map(
            schema=StreamSchema('tuple<int32 a>'))
        otherSplTupleStream2 = pyObjStream.map(schema='tuple<int32 a>')

        self.assertRaises(TypeError, evstr.publish, pyObjStream, "Topic")
        self.assertRaises(TypeError, evstr.publish, binStream, "Topic")
        self.assertRaises(TypeError, evstr.publish, xmlStream, "Topic")
        self.assertRaises(TypeError, evstr.publish, binMsgMetaStream, "Topic")
        self.assertRaises(TypeError, evstr.publish, strMsgMetaStream, "Topic")
        self.assertRaises(TypeError, evstr.publish, otherSplTupleStream1,
                          "Topic")
        self.assertRaises(TypeError, evstr.publish, otherSplTupleStream2,
                          "Topic")
    def test_batch_aggregate(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=122))

        w = s.batch(size=10)

        a = R.Aggregate.invoke(w, ARSCHEMA)
        a.acount = a.count()
        a.acount_all = a.count_all()
        a.amax = a.max('seq')

        r = a.stream

        tester = Tester(topo)
        # Mimic the aggregate processing
        expected = []
        for i in range(0, 120, 10):
            expected.append({
                'acount': 10,
                'acount_all': 10,
                'amax': i + 10 - 1
            })
        tester.contents(r, expected)
        tester.tuple_count(r, 12)
        tester.test(self.test_ctxtype, self.test_config)
Esempio n. 4
0
    def test_close_on_tuples(self):
        ae_service_creds_file = os.environ['ANALYTICS_ENGINE']
        with open(ae_service_creds_file) as data_file:
            credentials = json.load(data_file)

        topo = Topology('test_hdfs_uri')

        if self.hdfs_toolkit_location is not None:
            tk.add_toolkit(topo, self.hdfs_toolkit_location)

        s = topo.source([
            'Hello World!', 'Hello', 'World', 'Hello World!', 'Hello', 'World'
        ]).as_string()
        result = hdfs.write(s,
                            credentials=credentials,
                            file='pytest/write_test%FILENUM.txt',
                            tuples_per_file=3)
        result.print()

        tester = Tester(topo)
        tester.tuple_count(result, 2, exact=True)
        #tester.run_for(60)

        cfg = {}
        job_config = streamsx.topology.context.JobConfig(tracing='info')
        job_config.add(cfg)
        cfg[streamsx.topology.context.ConfigParams.SSL_VERIFY] = False

        # Run the test
        tester.test(self.test_ctxtype, cfg, always_collect_logs=True)
def main():
    """
    Sample transform application.  This Python application builds a topology that
    * transforms a stream of string tuples from a source operator to a stream of integer tuples 
    * uses `transform` to perform addition on the integer tuples
    * prints the stream to stdout
    * submits the topology in standalone mode (compiles and executes it as a standalone application)
    
    Example:
        > python3 transform_sample.py
    Output:
        342
        474
        9342
    """

    # create the container for the topology that will hold the streams
    topo = Topology("transform_sample")

    # declare a source stream (`source`) that contains string tuples
    source = topo.source(transform_sample_functions.int_strings_transform)

    # transform the stream of string tuples (`source`) to a stream of integer tuples (`i1`)
    i1 = source.map(transform_sample_functions.string_to_int)

    # adds 17 to each integer tuple
    i2 = i1.map(transform_sample_functions.AddNum(17))

    # terminate the stream by printing each tuple to stdout
    i2.print()

    # execute the application in standalone mode
    streamsx.topology.context.submit("STANDALONE", topo)
    def test_fn(self):
        topo = Topology()

        s = fn_ecruos(topo)
        self._csl_stream(s, 'source', 'fn_ecruos')

        s = fn_retlif(s)
        self._csl_stream(s, 'filter', 'fn_retlif')

        s = fn_pam(s)
        self._csl_stream(s, 'map', 'fn_pam')

        s = fn_pam_talf(s)
        self._csl_stream(s, 'flat_map', 'fn_pam_talf')
        
        s = fn_gnirts_sa(s)
        self._csl_stream(s, 'as_string', 'fn_gnirts_sa')

        s = fn_nosj_sa(s)
        self._csl_stream(s, 'as_json', 'fn_nosj_sa')

        st = fn_ebircsbus(topo)
        self._csl_stream(st, 'subscribe', 'fn_ebircsbus')

        e = fn_hcae_rof(s)
        self._csl_sink(e, 'for_each', 'fn_hcae_rof')

        e = fn_hsilbup(s)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_hsilbup(topo.source([]), schema=CommonSchema.Json)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_tnirp(s)
        self._csl_sink(e, 'print', 'fn_tnirp')
Esempio n. 7
0
def main():
    #define needed variables
    COMMANDS_TOPIC = "streamsx/iot/device/commands/send"  #topic to publish commands to
    EVENTS_TOPIC = "streamsx/iot/device/events"  #topic to subscribe to for events
    incoming_schema = schema.StreamSchema(
        "tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>"
    )
    cmd_schema = schema.StreamSchema(
        'tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>'
    )

    topo = Topology('ReadingsFromIot')

    #Subscribe to  events
    events = topo.subscribe(EVENTS_TOPIC, incoming_schema)
    sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors")
    readings = sensor_events.map(get_event_data)
    readings.print()

    #send a command
    cmd_stream = sensor_events.map(get_cmd)
    #convert the commands stream to a SPL structured schema
    commands_to_publish = cmd_stream.map(lambda x: (
        x["typeId"],
        x["deviceId"],
        x["cmdId"],
        x["jsonString"],
    ),
                                         schema=cmd_schema)

    commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema)
    commands_to_publish.print()
    result = submit_to_service(topo)
    print("Submitted job to the service, job id = " + str(result.job.id))
Esempio n. 8
0
def main():
   local = sys.argv[1] == "local"


   #define needed variables
   COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to
   EVENTS_TOPIC = "streamsx/iot/device/events" #topic to subscribe to for events
   incoming_schema =  schema.StreamSchema("tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>")
   cmd_schema = schema.StreamSchema('tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>')


   topo = Topology('ReadingsFromIot')

   #Subscribe to  events
   events = topo.subscribe(EVENTS_TOPIC, incoming_schema,"AllEventsAsJSON")
   sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors","SensorEventsAsJSON")
   readings = sensor_events.map(get_event_data,"ReadingsStream")
   readings.print()

   #send a command
   cmd_stream = sensor_events.map(get_cmd, "CommandsAsJSON")
   #convert the commands stream to a SPL structured schema
   commands_to_publish = cmd_stream.map(lambda x : (x["typeId"],x["deviceId"],x["cmdId"],x["jsonString"],), schema = cmd_schema, name="CommandsToPublish")

   commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema)

   if local and len(sys.argv) > 2:
      username = sys.argv[2]
      password = sys.argv[3]
      result = submit_to_service(topo, local, username, password)
   else:
   	  result = submit_to_service(topo, local)

   print("Submitted job to the service, job id = " + str(result.job.id))
Esempio n. 9
0
def main():
    """
    Finds outliers from a sequence of floats (e.g. simulating a sensor reading).
    Demonstrates function logic that maintains state across tuples.
    
    Example:
        python3 find_outliers.py
    Example Output:
        2.753064082105016
        -2.210758753960355
        1.9847958795117937
        2.661689193901883
        2.468061723082693
        ...
    """
    topo = Topology("find_outliers")

    # Produce a stream of random float values with a normal
    # distribution, mean 0.0 and standard deviation 1.
    values = topo.source(find_outliers_functions.readings)

    # Filters the values based on calculating the mean and standard
    # deviation from the incoming data. In this case only outliers are
    # present in the output stream outliers. An outlier is defined as
    # more than (threshold * standard deviation) from the mean.  The
    # threshold in this example is 2.0.
    # This demonstrates a functional logic class that is
    # stateful. The threshold, sum_x, and sum_x_squared maintain
    # their values across multiple invocations.
    outliers = values.filter(find_outliers_functions.IsOutlier(2.0))

    outliers.print()

    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 10
0
    def test_fetch_logs_on_failure(self):
        topo = Topology("fetch_logs_on_failure")
        s = topo.source(["foo"])

        tester = Tester(topo)
        # Causes test to fail
        tester.contents(s, ["bar"])

        try:
            self.tester = tester
            tester.local_check = self._can_retrieve_logs
            tester.test(self.test_ctxtype, self.test_config)
        except AssertionError:
            # This test is expected to fail, do nothing.
            pass

        # Check if logs were downloaded
        if self.can_retrieve_logs:
            logs = tester.result['application_logs']
            exists = os.path.isfile(logs)

            self.assertTrue(
                exists, "Application logs were not downloaded on test failure")

            if exists:
                os.remove(logs)
Esempio n. 11
0
def main():
    """
    Sample filtering echo topology application. This Python application builds a
    simple topology that echos its command line arguments to standard output.

    This demonstrates use of Python functional logic to filter the tuples.
    A user-defined function implements the filtering logic, in this
    case only echo tuples that start with the letter `d`.

    Args:
        a list of values
        
    Example:
        python3 filter_echo.py cat dog mouse door
    Output:
        dog
        door
    """

    topo = Topology("filter_echo")
    source = topo.source(sys.argv[1:])

    # Declare a stream that will execute functional logic
    # against tuples on the echo stream.
    # For each tuple that will appear on echo, the
    # lambda function will be called, passing the tuple.
    # If it returns True then the tuple will appear on the filtered
    # stream, otherwise the tuple is discarded.
    filtered = source.filter(lambda tuple: tuple.startswith("d"))

    filtered.print()

    streamsx.topology.context.submit("STANDALONE", topo)
Esempio n. 12
0
def main():
    t = Topology("FFT_Sample")
    readings = t.source(signal_generator.Readings(50)).transform(TumblingWindow(10))
    fftStream = readings.transform(fftpack.fft)
    fftStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Esempio n. 13
0
def main():
    """
    Sample echo topology application. This Python application builds a
    simple topology that echoes its command line arguments to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    Args:
        a list of values to print to stdout
        
    Example:
        python3 echo.py hello1 hello2 hello3
    Output:
        hello1
        hello2
        hello3
    """

    topo = Topology("echo")
    # The command line arguments (sys.argv) are captured by the SysArgv
    # callable class and will be used at runtime as the contents of the
    # echo stream.
    echo = topo.source(echo_functions.SysArgv(sys.argv[1:]))

    # print the echo stream to stdout
    echo.print()

    # At this point the topology is declared with a single
    # stream that is printed to stdout

    # execute the topology by submitting to a standalone context
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 14
0
def main():
    """
    Sample echo topology application. This Python application builds a
    simple topology that echoes its command line arguments to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    Args:
        a list of values to print to stdout
        
    Example:
        python3 echo.py hello1 hello2 hello3
    Output:
        hello1
        hello2
        hello3
    """
    
    topo = Topology("echo")
    # The command line arguments (sys.argv) are captured by the SysArgv
    # callable class and will be used at runtime as the contents of the
    # echo stream.
    echo = topo.source(echo_functions.SysArgv(sys.argv[1:]))
    
    # print the echo stream to stdout
    echo.print()
    
    # At this point the topology is declared with a single
    # stream that is printed to stdout
    
    # execute the topology by submitting to a standalone context
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 15
0
    def test_MQTTSink_schemas_bad(self):
        topo = Topology()
        pyObjStream = topo.source(['Hello', 'World!'])
        self.assertRaises(TypeError, pyObjStream.for_each, MQTTSink(server_uri='tcp://server:1833', topic='t1'))

        xmlStream = pyObjStream.map (schema=CommonSchema.XML)
        self.assertRaises(TypeError, xmlStream.for_each, MQTTSink(server_uri='tcp://server:1833', topic='t1'))
Esempio n. 16
0
def main():
    """
    Sample Hello World topology application. This Python application builds a
    simple topology that prints Hello World to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    This demonstrates the mechanics of declaring a topology and executing it.
            
    Example:
        python3 hello_world.py
    Output:
        Hello
        World!
    """
    
    # Create the container for the topology that will hold the streams of tuples.
    topo = Topology("hello_world")
    
    # Declare a source stream (hw) with string tuples containing two tuples,
    # "Hello" and "World!".
    hw = topo.source(hello_world_functions.source_tuples)
    
    # Sink hw by printing each of its tuples to standard output
    hw.print()
    
    # At this point the topology is declared with a single
    # stream that is printed to standard output
    
    # Now execute the topology by submitting to a standalone context.
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 17
0
    def test_dir_scan(self):
        topo = Topology()
        script_dir = os.path.dirname(os.path.realpath(__file__))
        sample_file = os.path.join(script_dir, 'data.csv')
        topo.add_file_dependency(sample_file,
                                 'etc')  # add sample file to etc dir in bundle
        fn = os.path.join('etc',
                          'data.csv')  # file name relative to application dir
        dir = streamsx.spl.op.Expression.expression('getApplicationDir()+"' +
                                                    '/etc"')
        scanned = topo.source(
            files.DirectoryScan(directory=dir, pattern='.*\.csv$'))
        r = scanned.map(
            files.CSVFilesReader(file_name='filename'),
            schema=StreamSchema('tuple<rstring a, int32 b, rstring filename>'))
        r.print()

        #result = streamsx.topology.context.submit("TOOLKIT", topo.graph) # creates tk* directory
        #print('(TOOLKIT):' + str(result))
        #assert(result.return_code == 0)
        result = streamsx.topology.context.submit(
            "BUNDLE", topo.graph)  # creates sab file
        assert (result.return_code == 0)
        os.remove(result.bundlePath)
        os.remove(result.jobConfigPath)
Esempio n. 18
0
    def test_MQTTSink_schemas(self):
        topo = Topology()
        pyObjStream = topo.source(['Hello', 'World!'])
        
        jsonStream = pyObjStream.as_json()
        # for_each() calls our populate()
        s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored')
        jsonStream.for_each(s)
        self.assertEqual(s._op.params['dataAttributeName'], 'jsonString')

        stringStream = pyObjStream.as_string()
        s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored')
        stringStream.for_each(s)
        self.assertEqual(s._op.params['dataAttributeName'], 'string')

        binStream = pyObjStream.map (func=lambda s: bytes(s, utf-8), schema=CommonSchema.Binary)
        s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored')
        binStream.for_each(s)
        self.assertEqual(s._op.params['dataAttributeName'], 'binary')
        
        userMsgStream = pyObjStream.map(func=lambda s: {'data':s, 'topic_name':'t1'}, schema=MqttDataTuple)
        s = MQTTSink(server_uri='tcp://server:1833', topic='t1')
        userMsgStream.for_each(s)
        self.assertNotIn('dataAttributeName', s._op.params)

        s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='data')
        userMsgStream.for_each(s)
        self.assertEqual(s._op.params['dataAttributeName'], 'data')
        
        splMsgStream = pyObjStream.map(func=lambda s: {'m':s, 'k':s}, schema='tuple<rstring m, int64 k>')
        s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='m')
        splMsgStream.for_each(s)
        self.assertEqual(s._op.params['dataAttributeName'], 'm')
Esempio n. 19
0
    def test_score_with_feed_on_second_input_port(self):
        print('\n---------' + str(self))
        name = 'test_score_with_feed_on_second_input_port'
        topo = Topology(name)
        streamsx.spl.toolkit.add_toolkit(topo, self.pmml_toolkit_home)

        credentials = self._get_credentials()
        models = pmml.model_feed(topo,
                                 connection_configuration=credentials,
                                 model_name="sample_pmml",
                                 polling_period=datetime.timedelta(minutes=5))
        # sample with a single model predictor field
        s = topo.source(['first tuple', 'second tuple']).as_string()
        out_schema = StreamSchema('tuple<rstring string, rstring result>')
        res = pmml.score(
            s,
            schema=out_schema,
            model_input_attribute_mapping='p=string',
            model_stream=models,
            raw_result_attribute_name='result',
            initial_model_provisioning_timeout=datetime.timedelta(minutes=1))
        res.print()

        if (("TestDistributed" in str(self))
                or ("TestStreamingAnalytics" in str(self))):
            self._launch(topo)
        else:
            # build only
            self._build_only(name, topo)
def main():
    """
    The 'Estimator' model accepts a tuple with these elements: (type, X, y), where:
       'type':  't' (for training), 'd' (for data), '' (empty string, same as 'd')
       'X':     is the data
       'y':     is the actual class of the data (only used to train the model)
    """
    training_size = 100
    num_centers = 2
    num_features = 2

    t = Topology("Estimator_Sample")
    trainingStream = t.source(
        sklearn_sources.Blobs(iterations=training_size,
                              isTraining=True,
                              centers=num_centers,
                              n_features=num_features))
    dataStream = t.source(
        sklearn_sources.Blobs(centers=num_centers, n_features=num_features))
    combinedStreams = trainingStream.union({dataStream})
    predictionStream = combinedStreams.transform(
        Estimator(training_size, KNeighborsClassifier()))
    predictionStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
    def _test_submit_sab(self):
        topo = Topology('SabTest', namespace='mynamespace')
        s = topo.source([1, 2])
        es = s.for_each(lambda x: None)
        bb = streamsx.topology.context.submit('BUNDLE', topo, {})
        self.assertIn('bundlePath', bb)
        self.assertIn('jobConfigPath', bb)

        sas = self.sc.get_streaming_analytics()

        sr = sas.submit_job(bundle=bb['bundlePath'])
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('name', sr)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        cr = sas.cancel_job(job_id=job_id)

        jn = 'SABTEST:' + str(time.time())
        jc = streamsx.topology.context.JobConfig(job_name=jn)
        sr = sas.submit_job(bundle=bb['bundlePath'], job_config=jc)
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        self.assertIn('name', sr)
        self.assertEqual(jn, sr['name'])
        cr = sas.cancel_job(job_id=job_id)

        os.remove(bb['bundlePath'])
        os.remove(bb['jobConfigPath'])
    def test_endpoint_source(self):
        topo = Topology("test_endpoint_source")

        service_documentation={'title': 'streamsx-sample-endpoint-sources', 'description': '2 sources'}

        documentation = dict()
        documentation['summary'] = 'Test endpoint source'
        documentation['tags'] = ['Input', 'STREAMS']
        documentation['description'] = 'CPD job endpoint injects some data'
        doc_attr = dict()
        descr = {'x': {'description': 'IDENTIFIER'}}
        doc_attr.update(descr)
        descr = {'n': {'description': 'NUMBER'}}
        doc_attr.update(descr)
        documentation['attributeDescriptions'] = doc_attr

        schema = 'tuple<rstring x, int64 n>'
        s = topo.source(EndpointSource(schema=schema, buffer_size=20000, service_documentation=service_documentation, endpoint_documentation=documentation), name='cpd_endpoint_src')
        s.print()

        documentation['summary'] = 'Test endpoint source JSON'
        s = topo.source(EndpointSource(schema=CommonSchema.Json, service_documentation=service_documentation, endpoint_documentation=documentation), name='cpd_endpoint_src_json')
        s.print()

        tester = Tester(topo)
        tester.run_for(10)
        tester.test(self.test_ctxtype, self.test_config)
    def test_fetch_logs_on_failure(self):
        topo = Topology("fetch_logs_on_failure")
        s = topo.source(["foo"])

        tester = Tester(topo)
        # Causes test to fail
        tester.contents(s, ["bar"])

        try:
            self.tester = tester
            tester.local_check = self._can_retrieve_logs
            tester.test(self.test_ctxtype, self.test_config)
        except AssertionError:
            # This test is expected to fail, do nothing.
            pass

        # Check if logs were downloaded
        if self.can_retrieve_logs:
            logs = tester.result['application_logs']
            exists = os.path.isfile(logs)
            
            self.assertTrue(exists, "Application logs were not downloaded on test failure")
            
            if exists:
                os.remove(logs)
Esempio n. 24
0
def main():
    """
    Finds outliers from a sequence of floats (e.g. simulating a sensor reading).
    Demonstrates function logic that maintains state across tuples.
    
    Example:
        python3 find_outliers.py
    Example Output:
        2.753064082105016
        -2.210758753960355
        1.9847958795117937
        2.661689193901883
        2.468061723082693
        ...
    """
    topo = Topology("find_outliers")
    
    # Produce a stream of random float values with a normal
    # distribution, mean 0.0 and standard deviation 1.
    values = topo.source(find_outliers_functions.readings)
    

    # Filters the values based on calculating the mean and standard
    # deviation from the incoming data. In this case only outliers are
    # present in the output stream outliers. An outlier is defined as 
    # more than (threshold * standard deviation) from the mean.  The
    # threshold in this example is 2.0.
    # This demonstrates a functional logic class that is
    # stateful. The threshold, sum_x, and sum_x_squared maintain 
    # their values across multiple invocations.
    outliers = values.filter(find_outliers_functions.IsOutlier(2.0))
    
    outliers.print()
    
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 25
0
 def test_compile_MQTTSource(self):
     print ('\n---------'+str(self))
     name = 'test_MQTTSource'
     topo = Topology(name)
     streamsx.spl.toolkit.add_toolkit(topo, self.mqtt_toolkit_home)
     src = MQTTSource(server_uri='tcp://server:1833', topics=['topic1', 'topic2'], schema=MqttDataTuple)
     # simply add all parameters; let' see if it compiles
     src.qos = [1, 2]
     src.message_queue_size = 122
     src.client_id = "client-IDsrc"
     src.reconnection_bound = 25
     src.trusted_certs = [TRUSTED_CERT_PEM, CLIENT_CA_CERT_PEM]
     src.client_cert = CLIENT_CERT_PEM
     src.client_private_key = PRIVATE_KEY_PEM
     src.ssl_protocol = 'TLSv1.1'
     src.vm_arg = ["-Xmx13G"]
     src.ssl_debug = True
     src.app_config_name = "abbconf2"
     src.command_timeout_millis=30000
     src.keep_alive_seconds = 65
     src.password = "******"
     src.username = "******"
     src.app_config_name = "mqtt_app_cfg"
     
     source_stream = topo.source(src, name='MqttStream')
     source_stream.print()
     # build only
     self._build_only(name, topo)
Esempio n. 26
0
    def test_source(self):
        topo = Topology()

        s = topo.source(s_none)
        self.assertEqual(CommonSchema.Python, s.oport.schema)

        s = topo.source(s_int)
        self.assertEqual(CommonSchema.Python, s.oport.schema)

        s = topo.source(s_str)
        self.assertEqual(CommonSchema.String, s.oport.schema)

        s = topo.source(s_any)
        self.assertEqual(CommonSchema.Python, s.oport.schema)

        s = topo.source(s_sensor)
        self.assertEqual(_normalize(SensorReading), s.oport.schema)

        s = topo.source(s_str_it)
        self.assertEqual(CommonSchema.String, s.oport.schema)

        s = topo.source(s_p)
        self.assertEqual(CommonSchema.Python, s.oport.schema)

        s = topo.source(s_s)
        self.assertEqual(CommonSchema.Python, s.oport.schema)
Esempio n. 27
0
def main():
    """
    Sample Hello World topology application. This Python application builds a
    simple topology that prints Hello World to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    This demonstrates the mechanics of declaring a topology and executing it.
            
    Example:
        python3 hello_world.py
    Output:
        Hello
        World!
    """

    # Create the container for the topology that will hold the streams of tuples.
    topo = Topology("hello_world")

    # Declare a source stream (hw) with string tuples containing two tuples,
    # "Hello" and "World!".
    hw = topo.source(["Hello", "World!"])

    # Sink hw by printing each of its tuples to standard output
    hw.print()

    # At this point the topology is declared with a single
    # stream that is printed to standard output

    # Now execute the topology by submitting to a standalone context.
    streamsx.topology.context.submit("STANDALONE", topo)
Esempio n. 28
0
def main():
    """
    Sample filtering echo topology application. This Python application builds a
    simple topology that echos its command line arguments to standard output.

    This demonstrates use of Python functional logic to filter the tuples.
    A user-defined function implements the filtering logic, in this
    case only echo tuples that start with the letter `d`.

    Args:
        a list of values
        
    Example:
        python3 filter_echo.py cat dog mouse door
    Output:
        dog
        door
    """
    
    topo = Topology("filter_echo")
    source = topo.source(filter_echo_functions.SysArgv(sys.argv[1:]))
    
    # Declare a stream that will execute functional logic
    # against tuples on the echo stream.
    # For each tuple that will appear on echo, the below
    # `starts_with_d` method will be called.  If it returns
    # True then the tuple will appear on the filtered
    # stream, otherwise the tuple is discarded.
    filtered = source.filter(filter_echo_functions.starts_with_d)
    
    filtered.print()
    
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 29
0
    def test_scikit_learn(self):
        """Verify basic scikit-learn tutorial code works as a stream."""
        digits = datasets.load_digits()
        clf = svm.SVC(gamma=0.001, C=100.)
        clf.fit(digits.data[:-10], digits.target[:-10])

        expected = []
        for i in digits.data[-10:]:
            d = clf.predict(i.reshape(1, -1))
            expected.append(d[0])

        topo = Topology()

        topo.add_pip_package('scikit-learn')
        topo.exclude_packages.add('sklearn')

        images = topo.source(digits.data[-10:], name='Images')
        images_digits = images.map(
            lambda image: clf.predict(image.reshape(1, -1))[0],
            name='Predict Digit')

        tester = Tester(topo)
        tester.contents(images_digits, expected)
        tester.tuple_count(images_digits, 10)
        tester.test(self.test_ctxtype, self.test_config)
Esempio n. 30
0
    def test_fn(self):
        topo = Topology()

        s = fn_ecruos(topo)
        self._csl_stream(s, 'source', 'fn_ecruos')

        s = fn_retlif(s)
        self._csl_stream(s, 'filter', 'fn_retlif')

        s = fn_pam(s)
        self._csl_stream(s, 'map', 'fn_pam')

        s = fn_pam_talf(s)
        self._csl_stream(s, 'flat_map', 'fn_pam_talf')

        s = fn_gnirts_sa(s)
        self._csl_stream(s, 'as_string', 'fn_gnirts_sa')

        s = fn_nosj_sa(s)
        self._csl_stream(s, 'as_json', 'fn_nosj_sa')

        st = fn_ebircsbus(topo)
        self._csl_stream(st, 'subscribe', 'fn_ebircsbus')

        e = fn_hcae_rof(s)
        self._csl_sink(e, 'for_each', 'fn_hcae_rof')

        e = fn_hsilbup(s)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_hsilbup(topo.source([]), schema=CommonSchema.Json)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_tnirp(s)
        self._csl_sink(e, 'print', 'fn_tnirp')
Esempio n. 31
0
    def test_maintain_hints(self):
        topo = Topology()
        s = topo.source(s_str)
        s.map(m_str)
        self.assertRaises(TypeError, s.map, m_sensor)

        d = s.autonomous()
        d.map(m_str)
        self.assertRaises(TypeError, d.map, m_sensor)

        d = s.low_latency()
        d.map(m_str)
        self.assertRaises(TypeError, d.map, m_sensor)

        d = d.end_low_latency()
        d.map(m_str)
        self.assertRaises(TypeError, d.map, m_sensor)

        p = s.parallel(width=3)
        t = p.map(m_str).as_string()
        self.assertRaises(TypeError, p.map, m_sensor)

        e = t.end_parallel()
        e.map(m_str)
        self.assertRaises(TypeError, e.map, m_sensor)
Esempio n. 32
0
    def test_image_name_image_tag(self):
        topo = Topology("test_image_name_image_tag")
        heartbeat = topo.source(lambda: itertools.count())
        heartbeat.print()

        image_name = 'py-tst'
        image_tag = 'v1.0'
        cfg = {ConfigParams.SSL_VERIFY: False}
        jc = JobConfig()
        jc.raw_overlay = {
            'edgeConfig': {
                'imageName': image_name,
                'imageTag': image_tag,
                'pipPackages': ['pandas', 'numpy'],
                'rpms': ['atlas-devel']
            }
        }
        jc.add(cfg)
        try:
            submission_result = submit(ContextTypes.EDGE, topo.graph, cfg)
            print(str(submission_result))
            self.assertTrue(submission_result is not None)
            self.assertTrue(self._is_not_blank(submission_result.image))
            self.assertTrue(self._is_not_blank(submission_result.imageDigest))
            self.assertTrue(image_name in submission_result.image)
            self.assertTrue(image_tag in submission_result.image)
        except RuntimeError as e:
            print(str(e))
            self.skipTest("Skip test, CPD does not support EDGE.")
    def test_endpoint_sink(self):
        topo = Topology("test_endpoint_sink")
        stream1 = topo.source(lambda : itertools.count()).as_string()

        endpoint_documentation = dict()
        endpoint_documentation['summary'] = 'Sample endpoint sink'
        endpoint_documentation['tags'] = ['Output']
        endpoint_documentation['description'] = 'Streams job endpoint emits some data with random numbers'

        doc_attr = dict()
        descr = {'string': {'description': 'number incremented by one'}}
        doc_attr.update(descr)
        endpoint_documentation['attributeDescriptions'] = doc_attr

        service_documentation={'title': 'streamsx-sample-endpoint-sink', 'description': 'NUMBER GENERATOR', 'version': '0.1.0', 'externalDocsUrl': 'https://mycompany.com/numgen/doc', 'externalDocsDescription': 'Number generator documentation'}

        tags = dict()
        tag1 = {'Output': {'description': 'Output tag description', 'externalDocs': {'url': 'https://mycompany.com/numgen/input/doc', 'description': 'Output tag external doc description'}}}
        tags.update(tag1)
        service_documentation['tags'] = tags

        stream1.for_each(EndpointSink(buffer_size=50000, endpoint_documentation=endpoint_documentation, service_documentation=service_documentation), name='cpd_endpoint_sink')

        tester = Tester(topo)
        tester.tuple_count(stream1, 10, exact=False)
        tester.run_for(10)
        tester.test(self.test_ctxtype, self.test_config)
def main():
    """
    Sample transform application.  This Python application builds a topology that
    * transforms a stream of string tuples from a source operator to a stream of integer tuples 
    * uses `transform` to perform addition on the integer tuples
    * prints the stream to stdout
    * submits the topology in standalone mode (compiles and executes it as a standalone application)
    
    Example:
        > python3 transform_sample.py
    Output:
        342
        474
        9342
    """
    
    # create the container for the topology that will hold the streams
    topo = Topology("transform_sample")
    
    # declare a source stream (`source`) that contains string tuples
    source = topo.source(transform_sample_functions.int_strings_transform)
    
    # transform the stream of string tuples (`source`) to a stream of integer tuples (`i1`)
    i1 = source.transform(transform_sample_functions.string_to_int)
    
    # adds 17 to each integer tuple 
    i2 = i1.transform(transform_sample_functions.AddNum(17))
    
    # terminate the stream by printing each tuple to stdout
    i2.print()
    
    # execute the application in standalone mode
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 35
0
 def test_to_avro_params(self):
     topo = Topology()
     s = topo.source(JsonData('a', 1)).as_json()
     avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, tuples_per_message=1000)
     avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, bytes_per_message=1024)
     avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=datetime.timedelta(seconds=5))
     avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=5)
     avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=15.0)
 def test_no_python_schema(self):
     topo = Topology('test_no_python_schema')
     # EndpointSource does not support Python schema, expect TypeError
     self.assertRaises(TypeError, EndpointSource, schema=CommonSchema.Python)
     # EndpointSink does not support Python schema, expect TypeError
     stream1 = topo.source(lambda : itertools.count())
     with self.assertRaises(TypeError):
        stream1.for_each(EndpointSink())
Esempio n. 37
0
def main():
    t = Topology("FFT_Sample")
    readings = t.source(signal_generator.Readings(50)).transform(
        TumblingWindow(10))
    fftStream = readings.transform(fftpack.fft)
    fftStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Esempio n. 38
0
    def test_sequence(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=122))

        tester = Tester(topo)
        tester.tuple_check(s, lambda x: 'seq' in x and 'ts' in x)
        tester.tuple_count(s, 122)
        tester.test(self.test_ctxtype, self.test_config)
Esempio n. 39
0
 def test_source_argcount(self):
     topo = Topology()
     topo.source(a_0)
     topo.source(A_0())
     self.assertRaises(TypeError, topo.source, a_1)
     self.assertRaises(TypeError, topo.source, A_1())
     topo.source(ao_1)
     topo.source(AO_1())
Esempio n. 40
0
 def test_creds(self):
     creds_file = os.environ['EVENTSTREAMS_CREDENTIALS']
     with open(creds_file) as data_file:
         credentials = json.load(data_file)
     topo = Topology()
     stream = topo.source(['Hello', 'World']).as_json()
     evstr.publish(stream, 'Topic', credentials=credentials)
     evstr.publish(stream, 'Topic', credentials='eventstreams')
    def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, self.test_config)
Esempio n. 42
0
def main():
    ref_signal = signal.hann(10)

    t = Topology("Convolve_Sample")
    readings = t.source(signal_generator.Readings(100)).transform(TumblingWindow(20))
    convolveStream = readings.transform(signal_functions.Convolve(ref_signal))
    convolveStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Esempio n. 43
0
def main():
    filter_order = 4
    cutoffFreq = 100
    sampleRate = 1000

    t = Topology("LowpassFilter_Sample")
    readings = t.source(signal_generator.Readings(50000)).transform(TumblingWindow(2000))
    filterStream = readings.transform(butterworth.Lowpass(filter_order, cutoffFreq, sampleRate))
    filterStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Esempio n. 44
0
def main():
    """
    Sample continuous (streaming) grep topology application. This Python application builds a
    simple topology that periodically polls a directory for files, reads each file and
    output lines that contain the search term.
    Thus as each file is added to the directory, the application will read
    it and output matching lines.
    
    Args:
        directory (string): a directory that contains files to process
        search_string (string): a search term
        
    Example:
        * Create a subdirectory "dir"
        * Create file1.txt in subdirectory "dir" with the following contents:
            file1 line1
            file1 line2
            file1 line3
        * Create file2.txt in subdirectory "dir" with the following contents:
            file2 line1
            file2 line2
            file2 line3
        * python3 grep.py dir line2
        
    Output:
        file1 line2
        file2 line2
    """
    
    if len(sys.argv) != 3:
        print("Usage: python3 grep.py <directory> <search_string>")
        return
    directory = sys.argv[1]
    term = sys.argv[2]
    topo = Topology("grep")
    
    # Declare a stream that will contain the contents of the files.
    # For each input file, DirectoryWatcher opens the file and reads its contents 
    # as a text file, producing a tuple for each line of the file. The tuple contains
    # the contents of the line, as a string.
    lines = topo.source(util_functions.DirectoryWatcher(directory))
    
    # Filter out non-matching lines. FilterLine is a callable class 
    # that will be executed for each tuple on lines, that is each line
    # read from a file.  Only lines that contain the string `term` will
    # be included in the output stream.
    matching = lines.filter(grep_functions.FilterLine(term))
    
    # print the matching lines to standard out
    matching.print()
    
    # execute the topology
    streamsx.topology.context.submit("STANDALONE", topo)
    def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        sc = rest.StreamsConnection(username=self.username, password=self.password)
        sc.session.verify = False
        config = {ConfigParams.STREAMS_CONNECTION : sc}

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, config)
    def test_always_fetch_logs(self):
        topo = Topology("always_fetch_logs")
        s = topo.source(["foo"])

        tester = Tester(topo)
        tester.contents(s, ["foo"])

        tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True)

        # Check if logs were downloaded
        logs = tester.result['application_logs']
        exists = os.path.isfile(logs)

        self.assertTrue(exists, "Application logs were not downloaded on test success")

        if exists:
            os.remove(logs)
def main():
    """
    The 'Estimator' model accepts a tuple with these elements: (type, X, y), where:
       'type':  't' (for training), 'd' (for data), '' (empty string, same as 'd')
       'X':     is the data
       'y':     is the actual class of the data (only used to train the model)
    """
    training_size = 100
    num_centers = 2
    num_features = 2

    t = Topology("Estimator_Sample")
    trainingStream = t.source(sklearn_sources.Blobs(iterations=training_size, isTraining=True, centers=num_centers, n_features=num_features))
    dataStream = t.source(sklearn_sources.Blobs(centers=num_centers, n_features=num_features))
    combinedStreams = trainingStream.union({dataStream})
    predictionStream = combinedStreams.transform(Estimator(training_size, KNeighborsClassifier()))
    predictionStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
    def run(self, context="DISTRIBUTED"):
        ## Create topology
        topo = Topology("HealthcareDemo")

        ## Ingest, preprocess and aggregate patient data
        patientData = topo.subscribe("ingest-physionet", schema.CommonSchema.Json) \
                          .map(functions.identity) \
                          .filter(healthcare_functions.PatientFilter(self.patient_id)) \
                          .transform(healthcare_functions.GenTimestamp(self.sample_rate)) \
                          .transform(SlidingWindow(length=self.sample_rate, trigger=self.sample_rate-1)) \
                          .transform(healthcare_functions.aggregate) \

        ## Calculate RPeak and RR delta
        rpeak_data_stream = patientmonitoring_functions.streaming_rpeak(patientData, self.sample_rate, data_label='ECG Lead II')

        ## Create a view of the data
        self.view_data = rpeak_data_stream.view()

        ## Compile Python Streams application and submit job
        streamsx.topology.context.submit(context, topo.graph, username=self.username, password=self.password)
    def test_always_fetch_logs(self):
        topo = Topology("always_fetch_logs")
        s = topo.source(["foo"])

        tester = Tester(topo)
        tester.contents(s, ["foo"])

        self.tester = tester
        tester.local_check = self._can_retrieve_logs
        tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True)

        if self.can_retrieve_logs:
            # streams version is >= 4.2.4. Fetching logs is supported.
            # Check if logs were downloaded
            logs = tester.result['application_logs']
            exists = os.path.isfile(logs)

            self.assertTrue(exists, "Application logs were not downloaded on test success")
            
            if exists:
                os.remove(logs)                            
Esempio n. 50
0
def main():
    """
    Plays Fizz Buzz (https://en.wikipedia.org/wiki/Fizz_buzz)
    
    Example:
        python3 fizz_buzz.py
    Output:
        1
        2
        Fizz!
        4
        Buzz!
        Fizz!
        7
        8
        Fizz!
        Buzz!
        11
        Fizz!
        13
        14
        FizzBuzz!
        ...

    """
    topo = Topology("fizz_buzz")
    
    # Declare a stream of int values
    counting = topo.source(fizz_buzz_functions.int_tuples)
    
    # Print the tuples to standard output
    play_fizz_buzz(counting).print()
    
    # At this point the streaming topology (streaming) is
    # declared, but no data is flowing. The topology
    # must be submitted to a context to be executed.
    
    # execute the topology by submitting to a standalone context
    streamsx.topology.context.submit("STANDALONE", topo.graph)
    def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, self.test_config)

        sr = tester.submission_result
        self.assertIn('submitMetrics', sr)
        m = sr['submitMetrics']
        self.assertIn('buildArchiveSize', m)
        self.assertIn('buildArchiveUploadTime_ms', m)
        self.assertIn('totalBuildTime_ms', m)
        self.assertIn('jobSubmissionTime_ms', m)

        self.assertTrue(m['buildArchiveSize'] > 0)
        self.assertTrue(m['buildArchiveUploadTime_ms'] > 0)
        self.assertTrue(m['totalBuildTime_ms'] > 0)
        self.assertTrue(m['jobSubmissionTime_ms'] > 0)
Esempio n. 52
0
    def test_scikit_learn(self):
        """Verify basic scikit-learn tutorial code works as a stream."""
        digits = datasets.load_digits()
        clf = svm.SVC(gamma=0.001, C=100.)
        clf.fit(digits.data[:-10], digits.target[:-10])

        expected = []
        for i in digits.data[-10:]:
            d = clf.predict(i.reshape(1,-1))
            expected.append(d[0])

        topo = Topology()

        topo.add_pip_package('scikit-learn')
        topo.exclude_packages.add('sklearn')

        images = topo.source(digits.data[-10:], name='Images')
        images_digits = images.map(lambda image : clf.predict(image.reshape(1,-1))[0], name='Predict Digit')

        tester = Tester(topo)
        tester.contents(images_digits, expected)
        tester.tuple_count(images_digits, 10)
        tester.test(self.test_ctxtype, self.test_config)
    def test_class(self):
        topo = Topology()
        ct = CallTopo()

        s = ct.ecruos(topo)
        self._csl_stream(s, 'source', 'ecruos', cls='CallTopo')

        s = ct.retlif(s)
        self._csl_stream(s, 'filter', 'retlif', cls='CallTopo')

        s = ct.pam(s)
        self._csl_stream(s, 'map', 'pam', cls='CallTopo')

        s = ct.pam_talf(s)
        self._csl_stream(s, 'flat_map', 'pam_talf', cls='CallTopo')
        
        s = ct.gnirts_sa(s)
        self._csl_stream(s, 'as_string', 'gnirts_sa', cls='CallTopo')

        s = ct.nosj_sa(s)
        self._csl_stream(s, 'as_json', 'nosj_sa', cls='CallTopo')

        st = ct.ebircsbus(topo)
        self._csl_stream(st, 'subscribe', 'ebircsbus', cls='CallTopo')

        e = ct.hcae_rof(s)
        self._csl_sink(e, 'for_each', 'hcae_rof', cls='CallTopo')

        e = ct.hsilbup(s)
        self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo')

        # test with implict schema change
        e = ct.hsilbup(topo.source([]), schema=CommonSchema.Json)
        self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo')

        e = ct.tnirp(s)
        self._csl_sink(e, 'print', 'tnirp', cls='CallTopo')
def main():
    """
    Sample temperature sensor topology application.  This Python application builds a 
    simple topology that prints an infinite stream of random numbers to standard
    output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
               
    Example:
        python3 temperature_sensor.py
    Output:
        ...
        0.3235259780332219
        1.7694181431337437
        0.27741668353194443
        -0.18827948813268522
        0.9576092897071428
        -0.8918033752738117
        -1.4946580133821907
        ...
        (Ctlr-C to exit)
    """
    
    # Create the container for the topology that will hold the streams of tuples.
    topo = Topology("temperature_sensor")
    
    # Declare an infinite stream of random numbers
    source = topo.source(temperature_sensor_functions.readings)
    
    # Sink the stream by printing each of its tuples to standard output
    source.print()
    
    # Now execute the topology by submitting to a standalone context.
    streamsx.topology.context.submit("STANDALONE", topo.graph)
def main():
    """
    Sample continuous (streaming) regular expression grep topology application.
    This Python application builds a simple topology that periodically polls a 
    directory for files, reads each file and output lines that match a regular
    expression.
    The matching is done on a stream parallelized into 5 parallel channels.
    Tuples are routed to parallel channels such that an even distribution is
    maintained.
    
    Args:
        directory (string): a directory that contains files to process
        search_pattern (string): a search pattern
        
    Example:
        * In addition to including the `com.ibm.streamsx.topology/opt/python/packages`
          directory in the PYTHONPATH environment variable, also include the
          `samples/python/topology/simple` directory.
        * Create a subdirectory "dir"
        * Create file1.txt in subdirectory "dir" with the following contents:
            file1 line1
            file1 line2
            file1 line3
        * Create file2.txt in subdirectory "dir" with the following contents:
            file2 line1
            file2 line2
            file2 line3
        * python3 parallel_regex_grep.py dir line[1-2]
        
    Example Output (intermixed):
        file2 line1
        file2 line2
        file1 line1
        file1 line2
        
        LineCounter@139676451944432 has sent ...
        LineCounter@139676451944432 has sent 6 lines to be filtered.                   <== The source operator produced a total of 6 tuples
        
        1. FilterLine@139676451362072 has received 1 lines on this parallel channel.   <== 5 filter operators are created, one for each parallel channel.
        2. FilterLine@139676441656064 has received 1 lines on this parallel channel.       4 operators processed 1 tuple each.
        3. FilterLine@139676441211568 has received 1 lines on this parallel channel.       1 operator processed 2 tuples.
        4. FilterLine@139676441211848 has received 1 lines on this parallel channel.
        5. FilterLine@139676441655728 has received ...                                  
           FilterLine@139676441655728 has received 2 lines on this parallel channel.
           
    """
    if len(sys.argv) != 3:
        print("Usage: python3 parallel_regex_grep.py <directory> <search_pattern>")
        return
    directory = sys.argv[1]
    pattern = sys.argv[2]
    
    # Define the topology
    topo = Topology("parallel_regex_grep")
    
    # Declare a stream with tuples that are string objects
    # All files in a directory are read, resulting in lines of text
    # Each line is a tuple in the stream
    lines = topo.source(util_functions.DirectoryWatcher(directory))
    
    # Count the total number of lines before they are split between
    # different parallel channels.
    lines_counter = lines.transform(parallel_regex_grep_functions.LineCounter())

    # Parallelize the Stream.
    # Since there are 5 channels of the stream, the approximate number of
    # lines sent to each channel should be numSentStrings/5. This can be
    # verified by comparing the outputs of the lines_counter stream to that
    # of the parallel channels.
    lines_parallel = lines_counter.parallel(5);
    
    # Filter for the matched string, and print the number strings that have
    # been tested. This is happening in parallel.
    filtered_parallel = lines_parallel.filter(parallel_regex_grep_functions.FilterLine(pattern))
    
    # Join the results of each parallel filter into one stream,
    # merging the parallel streams back into one stream.
    filtered_condensed = filtered_parallel.end_parallel();
    
    # Print the combined results
    filtered_condensed.print()
    
    # Execute the topology
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Esempio n. 56
0
def main():
    """
    Introduction to streaming with scikit-learn.

    Adapts the scikit-learn basic tutorial to
    a streaming environment.

    In a streaming environment events arrive continually
    and as individual items. In this case the digit prediction
    example is adapted to predict a digit as each image arrives.

    The training of the prediction model occurs locally using
    the example digits dataset, while the runtime prediction
    of images occurs in the IBM Cloud using the Streaming
    Analytics service.

    The original scikit-learn tutorial is at:
    http://scikit-learn.org/stable/tutorial/basic/tutorial.html 
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Start the streaming application definition
    topo = Topology(namespace='ScikitLearn', name='Images')

    # For use on the service we need to require scikit-learn
    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    # Create a stream of images by cycling through the last
    # ten images (which were excluded from the training)
    # Each tuple on the stream represents a single image.
    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # In this case the function is a lambda that predicts the
    # digit for an image using the model clf. Each return
    # from the lambda becomes a tuple on images_digits,
    # in this case a dictionary containing the image and the prediction.
    #
    # Note that the lambda function captures the model (clf)
    # and it will be pickled (using dill) to allow it to
    # be used on the service (which runs in IBM Cloud).
    # 
    images_digits = images.map(lambda image : {'image':image, 'digit':clf.predict(image.reshape(1,-1))[0]}, name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)
Esempio n. 57
0
 def test_keep_schema_string(self):
     topo = Topology()
     s = topo.source([]).as_string()
     self._check_kept(s)
Esempio n. 58
0
 def test_keep_schema_json(self):
     topo = Topology()
     s = topo.source([]).as_json()
     self._check_kept(s)
def main():
    """
    This is a variant of images.py that loads the model from a file.

    Here the Streams application is declared using a model
    contained in a file. This is a typical pattern where
    the model is created off-line and saved to a file.
    Subsequently applications load the file to perform predictions.

    Comments are mainly focused on the model loading, see
    images.py for details on other statements.

    http://scikit-learn.org/stable/modules/model_persistence.html
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Persist the model as a file
    joblib.dump(clf, 'digitmodel.pkl')

    # Just to ensure we are not referencing the local
    # instance of the model, we will load the model at
    # runtime from the file.
    clf = None

    topo = Topology(namespace='ScikitLearn', name='ImagesModelFile')

    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Add the model to the topology. This will take a copy
    # of the file and make it available when the job
    # is running. The returned path is relative to the
    # job's application directory. See DigitPredictor() for
    # how it is used.
    model_path = topo.add_file_dependency('digitmodel.pkl', 'etc')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # At runtime we need to load the model from the file so instead
    # of a stateless lambda function we use an instance a class.
    # This class (DigitPredictor) has the model path as its state
    # and will load the model from the file when the job is excuting
    # in the IBM Cloud.
    images_digits = images.map(DigitPredictor(model_path), name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)

    # Clean up, the running job has its own copy of the model file
    os.remove('digitmodel.pkl')
Esempio n. 60
0
 def test_keep_schema_schema(self):
     topo = Topology()
     s = topo.source([]).map(lambda x : x, schema='tuple<rstring a, int32 b>')
     self._check_kept(s)