def get_dataset_bounds(self, dataset_id=""):
        """@brief Get the bounding coordinates of the dataset using a couch map/reduce query
        @param dataset_id
        @result bounds is a dictionary containing spatial and temporal bounds of the dataset in standard units

        @param dataset_id    str
        @retval bounds    Unknown
        """
        dataset = self.read_dataset(dataset_id=dataset_id)
        key = dataset.primary_view_key  # stream_id
        ar = gevent.event.AsyncResult()

        def ar_timeout(db):
            opts = {"start_key": [key, 0], "end_key": [key, 2]}
            try:
                results = db.query_view("datasets/bounds", opts=opts)[0]["value"]
            except IndexError:
                # Means there are no results
                results = {}
            ar.set(results)

        db = self.container.datastore_manager.get_datastore(dataset.datastore_name)
        g = Greenlet(ar_timeout, db)
        g.start()
        bounds = ar.get(timeout=5)

        return bounds
 def run(self, blog):
     '''
     Initiate the thread to query, organize and publish the data
     '''
     production = Greenlet(self._grab,blog=blog,callback=lambda : self._on_done())
     production.start()
     self.greenlet_queue.append(production)
Exemple #3
0
def qoorate_determine_relevency(application, item):
    """schedule an indexing using concurrency"""
    logging.info("qoorate_determine_relevency, start: %s" % item)
    g = Greenlet(qoorate_generate_relevency, item)
    logging.info("qoorate_generate_relevency, greenlet, start(): %s" % item)
    g.start()
    logging.info("qoorate_generate_relevency, end: %s" % item)
Exemple #4
0
    def get_dataset_bounds(self, dataset_id=''):
        """@brief Get the bounding coordinates of the dataset using a couch map/reduce query
        @param dataset_id
        @result bounds is a dictionary containing spatial and temporal bounds of the dataset in standard units

        @param dataset_id    str
        @retval bounds    Unknown
        """
        dataset = self.read_dataset(dataset_id=dataset_id)
        key = dataset.primary_view_key  # stream_id
        ar = gevent.event.AsyncResult()

        def ar_timeout(db):
            opts = {'start_key': [key, 0], 'end_key': [key, 2]}
            try:
                results = db.query_view("datasets/bounds",
                                        opts=opts)[0]['value']
            except IndexError:
                # Means there are no results
                results = {}
            ar.set(results)

        db = self.container.datastore_manager.get_datastore(
            dataset.datastore_name)
        g = Greenlet(ar_timeout, db)
        g.start()
        bounds = ar.get(timeout=5)

        return bounds
Exemple #5
0
 def launch_benchmark(transform_number=1, primer=1, message_length=4):
     import gevent
     from gevent.greenlet import Greenlet
     from pyon.util.containers import DotDict
     from pyon.net.transport import NameTrio
     from pyon.net.endpoint import Publisher
     import uuid
     num = transform_number
     msg_len = message_length
     transforms = list()
     pids = 1
     TransformBenchTesting.message_length = message_length
     cc = Container.instance
     pub = Publisher(to_name=NameTrio(get_sys_name(),
                                      str(uuid.uuid4())[0:6]))
     for i in xrange(num):
         tbt = cc.proc_manager._create_service_instance(
             str(pids), 'tbt', 'prototype.transforms.linear',
             'TransformInPlace',
             DotDict({
                 'process': {
                     'name': 'tbt%d' % pids,
                     'transform_id': pids
                 }
             }))
         tbt.init()
         tbt.start()
         gevent.sleep(0.2)
         for i in xrange(primer):
             pub.publish(list(xrange(msg_len)))
         g = Greenlet(tbt.perf)
         g.start()
         transforms.append(tbt)
         pids += 1
Exemple #6
0
    def execute_replay(self):
        '''
        @brief Spawns a greenlet to take care of the query and work
        '''
        if not hasattr(self, 'output'):
            raise Inconsistent(
                'The replay process requires an output stream publisher named output. Invalid configuration!'
            )

        datastore_name = self.datastore_name
        key_id = self.key_id

        view_name = self.view_name

        opts = {
            'start_key': [key_id, 0],
            'end_key': [key_id, 2],
            'include_docs': True
        }

        g = Greenlet(self._query,
                     datastore_name=datastore_name,
                     view_name=view_name,
                     opts=opts,
                     callback=lambda results: self._publish_query(results))
        g.start()
Exemple #7
0
 def launch_benchmark(transform_number=1, primer=1,message_length=4):
     import gevent
     from gevent.greenlet import Greenlet
     from pyon.util.containers import DotDict
     from pyon.net.transport import NameTrio
     from pyon.net.endpoint import Publisher
     import uuid
     num = transform_number
     msg_len = message_length
     transforms = list()
     pids = 1
     TransformBenchTesting.message_length = message_length
     cc = Container.instance
     pub = Publisher(to_name=NameTrio(get_sys_name(),str(uuid.uuid4())[0:6]))
     for i in xrange(num):
         tbt=cc.proc_manager._create_service_instance(str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlace', DotDict({'process':{'name':'tbt%d' % pids, 'transform_id':pids}}))
         tbt.init()
         tbt.start()
         gevent.sleep(0.2)
         for i in xrange(primer):
             pub.publish(list(xrange(msg_len)))
         g = Greenlet(tbt.perf)
         g.start()
         transforms.append(tbt)
         pids += 1
Exemple #8
0
    def test_dispatcher(self):
        options = {
            'capabilities': {
                # NoneType because we're going to pass a None to the dispatcher.
                'NoneType': {
                    'enabled': True,
                    'server': '127.0.0.1',
                    'timing': {
                        'active_range': '00:00 - 23:59',
                        'sleep_interval': '1',
                        'activation_probability': '1'
                    },
                    'username': '******',
                    'password': '******',
                    'port': 8080
                },

            }
        }

        dispatcher = BeeDispatcher(options, None, '127.0.0.1')

        dispatcher.max_sessions = 1
        dispatcher.bee = Mock()
        dispatcher_greenlet = Greenlet(dispatcher.start)
        dispatcher_greenlet.start()
        time.sleep(1)
        dispatcher_greenlet.kill()
        dispatcher.bee.do_session.assert_called()
 def run(self, blog):
     '''
     Initiate the thread to query, organize and publish the data
     '''
     production = Greenlet(self._grab,
                           blog=blog,
                           callback=lambda: self._on_done())
     production.start()
     self.greenlet_queue.append(production)
    def on_start(self):
        super(ExampleDataProducer, self).on_start()

        stream_id = self.CFG.process.out_stream_id

        g = Greenlet(self._trigger_func, stream_id)
        log.debug('Starting publisher thread for simple ctd data.')
        g.start()
        log.warn('Publisher Greenlet started in "%s"' % self.__class__.__name__)
        self.greenlet_queue = []
        self.greenlet_queue.append(g)
    def on_start(self):
        super(ExampleDataProducer, self).on_start()

        stream_id = self.CFG.process.out_stream_id

        g = Greenlet(self._trigger_func, stream_id)
        log.debug('Starting publisher thread for simple ctd data.')
        g.start()
        log.info('Publisher Greenlet started in "%s"' %
                 self.__class__.__name__)
        self.greenlet_queue = []
        self.greenlet_queue.append(g)

        self.finished = gevent.event.Event()
Exemple #12
0
    def add_connection(self, username, ws):
        """
        Ads ws to the key username.
        """

        subscriber = self.redis.pubsub()
        subscriber.subscribe(username)
        g_listener = Greenlet(self._listen_to_channel, subscriber, ws)
        g_listener.start()
        ws.greenlet_listener = g_listener

        if username in self.subscriptions:
            self.subscriptions[username].append(ws)
        else:
            self.subscriptions[username] = [ws]
Exemple #13
0
    def launch_benchmark(transform_number=1, primer=1, message_length=4):
        import gevent
        from gevent.greenlet import Greenlet
        from pyon.util.containers import DotDict
        from pyon.net.transport import NameTrio
        from pyon.net.endpoint import Publisher
        import numpy
        from pyon.ion.granule.record_dictionary import RecordDictionaryTool
        from pyon.ion.granule.taxonomy import TaxyTool
        from pyon.ion.granule.granule import build_granule

        tt = TaxyTool()
        tt.add_taxonomy_set('a')

        import uuid
        num = transform_number
        msg_len = message_length
        transforms = list()
        pids = 1
        TransformBenchTesting.message_length = message_length
        cc = Container.instance
        pub = Publisher(to_name=NameTrio(get_sys_name(),
                                         str(uuid.uuid4())[0:6]))
        for i in xrange(num):
            tbt = cc.proc_manager._create_service_instance(
                str(pids), 'tbt', 'prototype.transforms.linear',
                'TransformInPlaceNewGranule',
                DotDict({
                    'process': {
                        'name': 'tbt%d' % pids,
                        'transform_id': pids
                    }
                }))
            tbt.init()
            tbt.start()
            gevent.sleep(0.2)
            for i in xrange(primer):
                rd = RecordDictionaryTool(tt, message_length)
                rd['a'] = numpy.arange(message_length)
                gran = build_granule(data_producer_id='dp_id',
                                     taxonomy=tt,
                                     record_dictionary=rd)
                pub.publish(gran)

            g = Greenlet(tbt.perf)
            g.start()
            transforms.append(tbt)
            pids += 1
Exemple #14
0
class TransformExampleProducer(StreamProcess):
    """
    Used as a data producer in examples.
    It publishes input for the following examples as {'num':<int>} where <int> is the integer.
    The production is published every 4 seconds and the published data is incremented by 1
    id_p = cc.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':'forced'}},'stream_producer':{'interval':4000}})
    cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start()
    """

    def on_init(self):
        log.debug("StreamProducer init. Self.id=%s" % self.id)


    def start(self):

        log.debug("StreamProducer start")
        # Threads become efficent Greenlets with gevent
        streams = self.CFG.get('process',{}).get('publish_streams',None)
        if streams:
            self.output_streams = list(k for k in streams)
        else:
            self.output_streams = None

        self.producer_proc = Greenlet(self._trigger_func)
        self.producer_proc.start()


    def process(self, packet):
        pass

    def on_quit(self):
        log.debug("TransformExampleProducer quit")
        self.producer_proc.kill()

    def _trigger_func(self):
        interval = self.CFG.get('stream_producer').get('interval')
        stream_route = self.CFG.get('stream_producer').get('stream_route')
        if self.output_streams:
            pub = getattr(self,self.output_streams[0],None)
        else:
            pub = None
        num = 1
        while True:
            msg = dict(num=str(num))
            pub.publish(msg)
            log.debug("Message %s published", num)
            num += 1
            time.sleep(interval/1000.0)
Exemple #15
0
    def test_dispatcher(self):
        options = {
            'enabled': True,
            'server': '127.0.0.1',
            'active_range': '00:00 - 23:59',
            'sleep_interval': '1',
            'activation_probability': '1',
            'username': '******',
            'password': '******',
            'port': 8080}

        dispatcher = BaitDispatcher(Mock(), options)

        dispatcher_greenlet = Greenlet(dispatcher.start)
        dispatcher_greenlet.start()
        gevent.sleep(2)
        dispatcher_greenlet.kill()
class TransformExampleProducer(StreamProcess):
    """
    Used as a data producer in examples.
    It publishes input for the following examples as {'num':<int>} where <int> is the integer.
    The production is published every 4 seconds and the published data is incremented by 1
    id_p = cc.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':'forced'}},'stream_producer':{'interval':4000}})
    cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start()
    """
    def on_init(self):
        log.debug("StreamProducer init. Self.id=%s" % self.id)

    def start(self):

        log.debug("StreamProducer start")
        # Threads become efficent Greenlets with gevent
        streams = self.CFG.get('process', {}).get('publish_streams', None)
        if streams:
            self.output_streams = list(k for k in streams)
        else:
            self.output_streams = None

        self.producer_proc = Greenlet(self._trigger_func)
        self.producer_proc.start()

    def process(self, packet):
        pass

    def on_quit(self):
        log.debug("TransformExampleProducer quit")
        self.producer_proc.kill()

    def _trigger_func(self):
        interval = self.CFG.get('stream_producer').get('interval')
        stream_route = self.CFG.get('stream_producer').get('stream_route')
        if self.output_streams:
            pub = getattr(self, self.output_streams[0], None)
        else:
            pub = None
        num = 1
        while True:
            msg = dict(num=str(num))
            pub.publish(msg)
            log.debug("Message %s published", num)
            num += 1
            time.sleep(interval / 1000.0)
Exemple #17
0
    def execute_replay(self):
        log.debug('(Replay Agent %s)', self.name)

        # Handle the query
        datastore_name = self.datastore_name
        key_id = self.key_id


        # Got the post ID, pull the post and the comments
        view_name = self.view_name
        opts = {
            'start_key':[key_id, 0],
            'end_key':[key_id,2],
            'include_docs': True
        }
        g = Greenlet(self._query,datastore_name=datastore_name, view_name=view_name, opts=opts,
            callback=lambda results: self._publish_query(results))
        g.start()
Exemple #18
0
    def test_dispatcher(self):
        options =  {
                    'enabled': True,
                    'server': '127.0.0.1',
                    'active_range': '00:00 - 23:59',
                    'sleep_interval': '1',
                    'activation_probability': '1',
                    'username': '******',
                    'password': '******',
                    'port': 8080 }

        dispatcher = BaitDispatcher({}, None, options)

        dispatcher.bait_type = Mock()
        dispatcher_greenlet = Greenlet(dispatcher.start)
        dispatcher_greenlet.start()
        time.sleep(1)
        dispatcher_greenlet.kill()
        dispatcher.bait_type.start.assert_called()
Exemple #19
0
    def start(self):
        """
            Starts sending client bees to the configured Honeypot.
        """
        logger.info('Starting client.')

        sessions = {}

        #greenlet to consume and maintain data in sessions list
        self.sessions_consumer = consumer.Consumer(sessions, self.config, self.status)
        gevent.spawn(self.sessions_consumer.start_handling)

        capabilities = []
        for b in clientbase.ClientBase.__subclasses__():
            capability_name = b.__name__.lower()

            if capability_name not in self.config['capabilities']:
                logger.warning(
                    "Not loading {0} bee because it has no option in configuration file.".format(b.__name__))
                continue
                #skip loading if disabled
            if not self.config['capabilities'][capability_name]['enabled']:
                logger.warning(
                    "Not loading {0} bee because it is disabled in the configuration file.".format(b.__name__))
                continue

            options = self.config['capabilities'][capability_name]
            bee = b(sessions, options)
            capabilities.append(bee)
            self.status['enabled_bees'].append(capability_name)
            logger.debug('Adding {0} as a capability'.format(bee.__class__.__name__))

        self.dispatcher_greenlets = []
        for bee in capabilities:
            dispatcher = BeeDispatcher(self.config, bee, self.my_ip)
            self.dispatchers[bee.__class__.__name__] = dispatcher
            current_greenlet = Greenlet(dispatcher.start)
            self.dispatcher_greenlets.append(current_greenlet)
            current_greenlet.start()

        drop_privileges()
        gevent.joinall(self.dispatcher_greenlets)
Exemple #20
0
    def test_dispatcher(self):
        options = {
            'enabled': True,
            'server': '127.0.0.1',
            'active_range': '00:00 - 23:59',
            'sleep_interval': '1',
            'activation_probability': '1',
            'username': '******',
            'password': '******',
            'port': 8080
        }

        dispatcher = BaitDispatcher({}, None, options)

        dispatcher.bait_type = Mock()
        dispatcher_greenlet = Greenlet(dispatcher.start)
        dispatcher_greenlet.start()
        time.sleep(1)
        dispatcher_greenlet.kill()
        dispatcher.bait_type.start.assert_called()
Exemple #21
0
    def test_dispatcher(self):
        options = {
            "enabled": True,
            "server": "127.0.0.1",
            "active_range": "00:00 - 23:59",
            "sleep_interval": "1",
            "activation_probability": "1",
            "username": "******",
            "password": "******",
            "port": 8080,
        }

        dispatcher = BaitDispatcher(None, options)

        dispatcher.bait_type = Mock()
        dispatcher_greenlet = Greenlet(dispatcher.start)
        dispatcher_greenlet.start()
        time.sleep(1)
        dispatcher_greenlet.kill()
        dispatcher.bait_type.start.assert_called()
    def on_start(self):
        '''
        Creates a publisher for each stream_id passed in as publish_streams
        Creates an attribute with the name matching the stream name which corresponds to the publisher
        ex: say we have publish_streams:{'output': my_output_stream_id }
          then the instance has an attribute output which corresponds to the publisher for the stream
          in my_output_stream_id
        '''

        # Get the stream(s)
        stream_id = self.CFG.get('process',{}).get('stream_id','')

        self.greenlet_queue = []

        self._usgs_def = USGS_stream_definition()

        # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided...
        if not stream_id:

            pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
            stream_id = pubsub_cli.create_stream(
                name='Example USGS Data',
                stream_definition=self._usgs_def,
                original=True,
                encoding='ION R2')

        self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node)
        # Needed to get the originator's stream_id
        self.stream_id = stream_id


        self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id)


        self.last_time = 0


        g = Greenlet(self._trigger_func, stream_id)
        log.warn('Starting publisher thread for simple usgs data.')
        g.start()
        self.greenlet_queue.append(g)
    def on_start(self):
        '''
        Creates a publisher for each stream_id passed in as publish_streams
        Creates an attribute with the name matching the stream name which corresponds to the publisher
        ex: say we have publish_streams:{'output': my_output_stream_id }
          then the instance has an attribute output which corresponds to the publisher for the stream
          in my_output_stream_id
        '''

        # Get the stream(s)
        stream_id = self.CFG.get('process',{}).get('stream_id','')

        self.greenlet_queue = []

        self._usgs_def = USGS_stream_definition()

        # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided...
        if not stream_id:

            pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
            stream_id = pubsub_cli.create_stream(
                name='Example USGS Data',
                stream_definition=self._usgs_def,
                original=True,
                encoding='ION R2')

        self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node)
        # Needed to get the originator's stream_id
        self.stream_id = stream_id


        self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id)


        self.last_time = 0


        g = Greenlet(self._trigger_func, stream_id)
        log.warn('Starting publisher thread for simple usgs data.')
        g.start()
        self.greenlet_queue.append(g)
    def execute_replay(self):
        '''
        @brief Spawns a greenlet to take care of the query and work
        '''
        if not hasattr(self, 'output'):
            raise Inconsistent('The replay process requires an output stream publisher named output. Invalid configuration!')

        datastore_name = self.datastore_name
        key_id = self.key_id

        view_name = self.view_name

        opts = {
            'start_key':[key_id,0],
            'end_key':[key_id,2],
            'include_docs':True
        }

        g = Greenlet(self._query,datastore_name=datastore_name, view_name=view_name, opts=opts,
            callback=lambda results: self._publish_query(results))
        g.start()
    def on_start(self):


        log.warn('Entering On Start!!!')
        # Get the stream(s)
        stream_id = self.CFG.get_safe('process.stream_id',{})

        self.greenlet_queue = []


        # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided...
        if not stream_id:

            pubsub_cli = PubsubManagementServiceClient(node=self.container.node)

            stream_def_id = pubsub_cli.create_stream_definition(name='Producer stream %s' % str(uuid4()),container=self.outgoing_stream_def)


            stream_id = pubsub_cli.create_stream(
                name='Example CTD Data',
                stream_definition_id = stream_def_id,
                original=True,
                encoding='ION R2')

        self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node)
        # Needed to get the originator's stream_id
        self.stream_id= stream_id


        self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id)


        self.last_time = 0


        g = Greenlet(self._trigger_func, stream_id)
        log.debug('Starting publisher thread for simple ctd data.')
        g.start()
        log.warn('Publisher Greenlet started in "%s"' % self.__class__.__name__)
        self.greenlet_queue.append(g)
    def on_start(self):

        log.warn('Entering On Start!!!')
        # Get the stream(s)
        stream_id = self.CFG.get_safe('process.stream_id', {})

        self.greenlet_queue = []

        # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided...
        if not stream_id:

            pubsub_cli = PubsubManagementServiceClient(
                node=self.container.node)

            stream_def_id = pubsub_cli.create_stream_definition(
                name='Producer stream %s' % str(uuid4()),
                container=self.outgoing_stream_def)

            stream_id = pubsub_cli.create_stream(
                name='Example CTD Data',
                stream_definition_id=stream_def_id,
                original=True,
                encoding='ION R2')

        self.stream_publisher_registrar = StreamPublisherRegistrar(
            process=self, node=self.container.node)
        # Needed to get the originator's stream_id
        self.stream_id = stream_id

        self.publisher = self.stream_publisher_registrar.create_publisher(
            stream_id=stream_id)

        self.last_time = 0

        g = Greenlet(self._trigger_func, stream_id)
        log.debug('Starting publisher thread for simple ctd data.')
        g.start()
        log.warn('Publisher Greenlet started in "%s"' %
                 self.__class__.__name__)
        self.greenlet_queue.append(g)
Exemple #27
0
class StreamProducer(SimpleProcess):
    """
    StreamProducer is not a stream process. A stream process is defined by a having an input stream which is processed.
    The Stream Producer takes the part of an agent pushing data into the system.

    """


    def on_init(self):
        log.debug("StreamProducer init. Self.id=%s" % self.id)

    def on_start(self):
        log.debug("StreamProducer start")
        self.producer_proc = Greenlet(self._trigger_func)
        self.producer_proc.start()


    def on_quit(self):
        log.debug("StreamProducer quit")
        self.process_proc.kill()
        super(StreamProducer,self).on_quit()

    def _trigger_func(self):
        interval = self.CFG.get('stream_producer').get('interval')
        routing_key = self.CFG.get('stream_producer').get('routing_key')

        # Create scoped exchange name
        XP = '.'.join([bootstrap.get_sys_name(),'science_data'])

        pub = ProcessPublisher(node=self.container.node, name=(XP,routing_key), process=self)
        num = 1
        while True:
            msg = dict(num=str(num))
            pub.publish(msg)
            log.debug("Message %s published", num)
            num += 1
            time.sleep(interval/1000.0)
Exemple #28
0
    def launch_benchmark(transform_number=1, primer=1,message_length=4):
        import gevent
        from gevent.greenlet import Greenlet
        from pyon.util.containers import DotDict
        from pyon.net.transport import NameTrio
        from pyon.net.endpoint import Publisher
        import numpy
        from pyon.ion.granule.record_dictionary import RecordDictionaryTool
        from pyon.ion.granule.taxonomy import TaxyTool
        from pyon.ion.granule.granule import build_granule

        tt = TaxyTool()
        tt.add_taxonomy_set('a')

        import uuid
        num = transform_number
        msg_len = message_length
        transforms = list()
        pids = 1
        TransformBenchTesting.message_length = message_length
        cc = Container.instance
        pub = Publisher(to_name=NameTrio(get_sys_name(),str(uuid.uuid4())[0:6]))
        for i in xrange(num):
            tbt=cc.proc_manager._create_service_instance(str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlaceNewGranule', DotDict({'process':{'name':'tbt%d' % pids, 'transform_id':pids}}))
            tbt.init()
            tbt.start()
            gevent.sleep(0.2)
            for i in xrange(primer):
                rd = RecordDictionaryTool(tt, message_length)
                rd['a'] = numpy.arange(message_length)
                gran = build_granule(data_producer_id='dp_id',taxonomy=tt, record_dictionary=rd)
                pub.publish(gran)

            g = Greenlet(tbt.perf)
            g.start()
            transforms.append(tbt)
            pids += 1
Exemple #29
0
class StreamProducer(SimpleProcess):
    """
    StreamProducer is not a stream process. A stream process is defined by a having an input stream which is processed.
    The Stream Producer takes the part of an agent pushing data into the system.

    """
    def on_init(self):
        log.debug("StreamProducer init. Self.id=%s" % self.id)

    def on_start(self):
        log.debug("StreamProducer start")
        self.producer_proc = Greenlet(self._trigger_func)
        self.producer_proc.start()

    def on_quit(self):
        log.debug("StreamProducer quit")
        self.process_proc.kill()
        super(StreamProducer, self).on_quit()

    def _trigger_func(self):
        interval = self.CFG.get('stream_producer').get('interval')
        routing_key = self.CFG.get('stream_producer').get('routing_key')

        # Create scoped exchange name
        XP = '.'.join([bootstrap.get_sys_name(), 'science_data'])

        pub = ProcessPublisher(node=self.container.node,
                               name=(XP, routing_key),
                               process=self)
        num = 1
        while True:
            msg = dict(num=str(num))
            pub.publish(msg)
            log.debug("Message %s published", num)
            num += 1
            time.sleep(interval / 1000.0)
class VizTransformProcForMatplotlibGraphs(TransformDataProcess):

    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    def on_start(self):
        super(VizTransformProcForMatplotlibGraphs,self).on_start()
        #assert len(self.streams)==1
        self.initDataFlag = True
        self.graph_data = {} # Stores a dictionary of variables : [List of values]

        # Need some clients
        self.rr_cli = ResourceRegistryServiceProcessClient(process = self, node = self.container.node)
        self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node)

        # extract the various parameters passed to the transform process
        self.out_stream_id = self.CFG.get('process').get('publish_streams').get('visualization_service_submit_stream_id')

        # Create a publisher on the output stream
        #stream_route = self.pubsub_cli.register_producer(stream_id=self.out_stream_id)
        out_stream_pub_registrar = StreamPublisherRegistrar(process=self.container, node=self.container.node)
        self.out_stream_pub = out_stream_pub_registrar.create_publisher(stream_id=self.out_stream_id)

        self.data_product_id = self.CFG.get('data_product_id')
        self.stream_def_id = self.CFG.get("stream_def_id")
        self.stream_def = self.rr_cli.read(self.stream_def_id)

        # Start the thread responsible for keeping track of time and generating graphs
        # Mutex for ensuring proper concurrent communications between threads
        self.lock = RLock()
        self.rendering_proc = Greenlet(self.rendering_thread)
        self.rendering_proc.start()




    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name )
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])


    def rendering_thread(self):
        from copy import deepcopy
        # Service Client

        # init Matplotlib
        fig = Figure()
        ax = fig.add_subplot(111)
        canvas = FigureCanvas(fig)
        imgInMem = StringIO.StringIO()
        while True:

            # Sleep for a pre-decided interval. Should be specifiable in a YAML file
            gevent.sleep(20)

            # If there's no data, wait
            # Lock is used here to make sure the entire vector exists start to finish, this assures that the data won
            working_set=None
            with self.lock:
                if len(self.graph_data) == 0:
                    continue
                else:
                    working_set = deepcopy(self.graph_data)


            # For the simple case of testing, lets plot all time variant variables one at a time
            xAxisVar = 'time'
            xAxisFloatData = working_set[xAxisVar]

            for varName, varData in working_set.iteritems():
                if varName == 'time' or varName == 'height' or varName == 'longitude' or varName == 'latitude':
                    continue

                yAxisVar = varName
                yAxisFloatData = working_set[varName]

                # Generate the plot

                ax.plot(xAxisFloatData, yAxisFloatData, 'ro')
                ax.set_xlabel(xAxisVar)
                ax.set_ylabel(yAxisVar)
                ax.set_title(yAxisVar + ' vs ' + xAxisVar)
                ax.set_autoscale_on(False)

                # generate filename for the output image
                fileName = yAxisVar + '_vs_' + xAxisVar + '.png'
                # Save the figure to the in memory file
                canvas.print_figure(imgInMem, format="png")
                imgInMem.seek(0)

                # submit resulting table back using the out stream publisher
                msg = {"viz_product_type": "matplotlib_graphs",
                       "data_product_id": self.data_product_id,
                       "image_obj": imgInMem.getvalue(),
                       "image_name": fileName}
                self.out_stream_pub.publish(msg)

                #clear the canvas for the next image
                ax.clear()
class VizTransformProcForMatplotlibGraphs(TransformDataProcess):
    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    def on_start(self):
        super(VizTransformProcForMatplotlibGraphs, self).on_start()
        #assert len(self.streams)==1
        self.initDataFlag = True
        self.graph_data = {
        }  # Stores a dictionary of variables : [List of values]

        # Need some clients
        self.rr_cli = ResourceRegistryServiceProcessClient(
            process=self, node=self.container.node)
        self.pubsub_cli = PubsubManagementServiceClient(
            node=self.container.node)

        # extract the various parameters passed to the transform process
        self.out_stream_id = self.CFG.get('process').get(
            'publish_streams').get('visualization_service_submit_stream_id')

        # Create a publisher on the output stream
        #stream_route = self.pubsub_cli.register_producer(stream_id=self.out_stream_id)
        out_stream_pub_registrar = StreamPublisherRegistrar(
            process=self.container, node=self.container.node)
        self.out_stream_pub = out_stream_pub_registrar.create_publisher(
            stream_id=self.out_stream_id)

        self.data_product_id = self.CFG.get('data_product_id')
        self.stream_def_id = self.CFG.get("stream_def_id")
        self.stream_def = self.rr_cli.read(self.stream_def_id)

        # Start the thread responsible for keeping track of time and generating graphs
        # Mutex for ensuring proper concurrent communications between threads
        self.lock = RLock()
        self.rendering_proc = Greenlet(self.rendering_thread)
        self.rendering_proc.start()

    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name)
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(
            stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])

    def rendering_thread(self):
        from copy import deepcopy
        # Service Client

        # init Matplotlib
        fig = Figure()
        ax = fig.add_subplot(111)
        canvas = FigureCanvas(fig)
        imgInMem = StringIO.StringIO()
        while True:

            # Sleep for a pre-decided interval. Should be specifiable in a YAML file
            gevent.sleep(20)

            # If there's no data, wait
            # Lock is used here to make sure the entire vector exists start to finish, this assures that the data won
            working_set = None
            with self.lock:
                if len(self.graph_data) == 0:
                    continue
                else:
                    working_set = deepcopy(self.graph_data)

            # For the simple case of testing, lets plot all time variant variables one at a time
            xAxisVar = 'time'
            xAxisFloatData = working_set[xAxisVar]

            for varName, varData in working_set.iteritems():
                if varName == 'time' or varName == 'height' or varName == 'longitude' or varName == 'latitude':
                    continue

                yAxisVar = varName
                yAxisFloatData = working_set[varName]

                # Generate the plot

                ax.plot(xAxisFloatData, yAxisFloatData, 'ro')
                ax.set_xlabel(xAxisVar)
                ax.set_ylabel(yAxisVar)
                ax.set_title(yAxisVar + ' vs ' + xAxisVar)
                ax.set_autoscale_on(False)

                # generate filename for the output image
                fileName = yAxisVar + '_vs_' + xAxisVar + '.png'
                # Save the figure to the in memory file
                canvas.print_figure(imgInMem, format="png")
                imgInMem.seek(0)

                # submit resulting table back using the out stream publisher
                msg = {
                    "viz_product_type": "matplotlib_graphs",
                    "data_product_id": self.data_product_id,
                    "image_obj": imgInMem.getvalue(),
                    "image_name": fileName
                }
                self.out_stream_pub.publish(msg)

                #clear the canvas for the next image
                ax.clear()
Exemple #32
0
 def activate_workers(self, workers: Greenlet) -> None:
     workers.start()
Exemple #33
0
# Lets see them in action.

# In[ ]:


# Creating a simple function and wrapping it in greenlet
def myfunction(arg1, arg2, **kwargs):
    print(arg1, arg2, kwargs)
    return 100


# In[ ]:

g = Greenlet(myfunction, 'One', 'Two',
             now='Buckle my shoe')  # create a Greenlet instance using c'tor
g.start(
)  # and then call start(), which immediately calls it, returns None always
g.join()
print('Finished')
print('Greenlet.value', g.value)  # Stores the function return value

# In[ ]:

# the other way is to use the spawn() method, which creates an instance and calls start() too.
# So it is a shorthand and widely used
jobs = [
    gevent.spawn(myfunction, '1', '2', now='Buckle my shoe')
    for i in range(0, 5)
]
gevent.joinall(
    jobs, timeout=3
)  # the first parameter is reduced to a subset of jobs that finished under 3 seconds