def get_dataset_bounds(self, dataset_id=""): """@brief Get the bounding coordinates of the dataset using a couch map/reduce query @param dataset_id @result bounds is a dictionary containing spatial and temporal bounds of the dataset in standard units @param dataset_id str @retval bounds Unknown """ dataset = self.read_dataset(dataset_id=dataset_id) key = dataset.primary_view_key # stream_id ar = gevent.event.AsyncResult() def ar_timeout(db): opts = {"start_key": [key, 0], "end_key": [key, 2]} try: results = db.query_view("datasets/bounds", opts=opts)[0]["value"] except IndexError: # Means there are no results results = {} ar.set(results) db = self.container.datastore_manager.get_datastore(dataset.datastore_name) g = Greenlet(ar_timeout, db) g.start() bounds = ar.get(timeout=5) return bounds
def run(self, blog): ''' Initiate the thread to query, organize and publish the data ''' production = Greenlet(self._grab,blog=blog,callback=lambda : self._on_done()) production.start() self.greenlet_queue.append(production)
def qoorate_determine_relevency(application, item): """schedule an indexing using concurrency""" logging.info("qoorate_determine_relevency, start: %s" % item) g = Greenlet(qoorate_generate_relevency, item) logging.info("qoorate_generate_relevency, greenlet, start(): %s" % item) g.start() logging.info("qoorate_generate_relevency, end: %s" % item)
def get_dataset_bounds(self, dataset_id=''): """@brief Get the bounding coordinates of the dataset using a couch map/reduce query @param dataset_id @result bounds is a dictionary containing spatial and temporal bounds of the dataset in standard units @param dataset_id str @retval bounds Unknown """ dataset = self.read_dataset(dataset_id=dataset_id) key = dataset.primary_view_key # stream_id ar = gevent.event.AsyncResult() def ar_timeout(db): opts = {'start_key': [key, 0], 'end_key': [key, 2]} try: results = db.query_view("datasets/bounds", opts=opts)[0]['value'] except IndexError: # Means there are no results results = {} ar.set(results) db = self.container.datastore_manager.get_datastore( dataset.datastore_name) g = Greenlet(ar_timeout, db) g.start() bounds = ar.get(timeout=5) return bounds
def launch_benchmark(transform_number=1, primer=1, message_length=4): import gevent from gevent.greenlet import Greenlet from pyon.util.containers import DotDict from pyon.net.transport import NameTrio from pyon.net.endpoint import Publisher import uuid num = transform_number msg_len = message_length transforms = list() pids = 1 TransformBenchTesting.message_length = message_length cc = Container.instance pub = Publisher(to_name=NameTrio(get_sys_name(), str(uuid.uuid4())[0:6])) for i in xrange(num): tbt = cc.proc_manager._create_service_instance( str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlace', DotDict({ 'process': { 'name': 'tbt%d' % pids, 'transform_id': pids } })) tbt.init() tbt.start() gevent.sleep(0.2) for i in xrange(primer): pub.publish(list(xrange(msg_len))) g = Greenlet(tbt.perf) g.start() transforms.append(tbt) pids += 1
def execute_replay(self): ''' @brief Spawns a greenlet to take care of the query and work ''' if not hasattr(self, 'output'): raise Inconsistent( 'The replay process requires an output stream publisher named output. Invalid configuration!' ) datastore_name = self.datastore_name key_id = self.key_id view_name = self.view_name opts = { 'start_key': [key_id, 0], 'end_key': [key_id, 2], 'include_docs': True } g = Greenlet(self._query, datastore_name=datastore_name, view_name=view_name, opts=opts, callback=lambda results: self._publish_query(results)) g.start()
def launch_benchmark(transform_number=1, primer=1,message_length=4): import gevent from gevent.greenlet import Greenlet from pyon.util.containers import DotDict from pyon.net.transport import NameTrio from pyon.net.endpoint import Publisher import uuid num = transform_number msg_len = message_length transforms = list() pids = 1 TransformBenchTesting.message_length = message_length cc = Container.instance pub = Publisher(to_name=NameTrio(get_sys_name(),str(uuid.uuid4())[0:6])) for i in xrange(num): tbt=cc.proc_manager._create_service_instance(str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlace', DotDict({'process':{'name':'tbt%d' % pids, 'transform_id':pids}})) tbt.init() tbt.start() gevent.sleep(0.2) for i in xrange(primer): pub.publish(list(xrange(msg_len))) g = Greenlet(tbt.perf) g.start() transforms.append(tbt) pids += 1
def test_dispatcher(self): options = { 'capabilities': { # NoneType because we're going to pass a None to the dispatcher. 'NoneType': { 'enabled': True, 'server': '127.0.0.1', 'timing': { 'active_range': '00:00 - 23:59', 'sleep_interval': '1', 'activation_probability': '1' }, 'username': '******', 'password': '******', 'port': 8080 }, } } dispatcher = BeeDispatcher(options, None, '127.0.0.1') dispatcher.max_sessions = 1 dispatcher.bee = Mock() dispatcher_greenlet = Greenlet(dispatcher.start) dispatcher_greenlet.start() time.sleep(1) dispatcher_greenlet.kill() dispatcher.bee.do_session.assert_called()
def run(self, blog): ''' Initiate the thread to query, organize and publish the data ''' production = Greenlet(self._grab, blog=blog, callback=lambda: self._on_done()) production.start() self.greenlet_queue.append(production)
def on_start(self): super(ExampleDataProducer, self).on_start() stream_id = self.CFG.process.out_stream_id g = Greenlet(self._trigger_func, stream_id) log.debug('Starting publisher thread for simple ctd data.') g.start() log.warn('Publisher Greenlet started in "%s"' % self.__class__.__name__) self.greenlet_queue = [] self.greenlet_queue.append(g)
def on_start(self): super(ExampleDataProducer, self).on_start() stream_id = self.CFG.process.out_stream_id g = Greenlet(self._trigger_func, stream_id) log.debug('Starting publisher thread for simple ctd data.') g.start() log.info('Publisher Greenlet started in "%s"' % self.__class__.__name__) self.greenlet_queue = [] self.greenlet_queue.append(g) self.finished = gevent.event.Event()
def add_connection(self, username, ws): """ Ads ws to the key username. """ subscriber = self.redis.pubsub() subscriber.subscribe(username) g_listener = Greenlet(self._listen_to_channel, subscriber, ws) g_listener.start() ws.greenlet_listener = g_listener if username in self.subscriptions: self.subscriptions[username].append(ws) else: self.subscriptions[username] = [ws]
def launch_benchmark(transform_number=1, primer=1, message_length=4): import gevent from gevent.greenlet import Greenlet from pyon.util.containers import DotDict from pyon.net.transport import NameTrio from pyon.net.endpoint import Publisher import numpy from pyon.ion.granule.record_dictionary import RecordDictionaryTool from pyon.ion.granule.taxonomy import TaxyTool from pyon.ion.granule.granule import build_granule tt = TaxyTool() tt.add_taxonomy_set('a') import uuid num = transform_number msg_len = message_length transforms = list() pids = 1 TransformBenchTesting.message_length = message_length cc = Container.instance pub = Publisher(to_name=NameTrio(get_sys_name(), str(uuid.uuid4())[0:6])) for i in xrange(num): tbt = cc.proc_manager._create_service_instance( str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlaceNewGranule', DotDict({ 'process': { 'name': 'tbt%d' % pids, 'transform_id': pids } })) tbt.init() tbt.start() gevent.sleep(0.2) for i in xrange(primer): rd = RecordDictionaryTool(tt, message_length) rd['a'] = numpy.arange(message_length) gran = build_granule(data_producer_id='dp_id', taxonomy=tt, record_dictionary=rd) pub.publish(gran) g = Greenlet(tbt.perf) g.start() transforms.append(tbt) pids += 1
class TransformExampleProducer(StreamProcess): """ Used as a data producer in examples. It publishes input for the following examples as {'num':<int>} where <int> is the integer. The production is published every 4 seconds and the published data is incremented by 1 id_p = cc.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':'forced'}},'stream_producer':{'interval':4000}}) cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start() """ def on_init(self): log.debug("StreamProducer init. Self.id=%s" % self.id) def start(self): log.debug("StreamProducer start") # Threads become efficent Greenlets with gevent streams = self.CFG.get('process',{}).get('publish_streams',None) if streams: self.output_streams = list(k for k in streams) else: self.output_streams = None self.producer_proc = Greenlet(self._trigger_func) self.producer_proc.start() def process(self, packet): pass def on_quit(self): log.debug("TransformExampleProducer quit") self.producer_proc.kill() def _trigger_func(self): interval = self.CFG.get('stream_producer').get('interval') stream_route = self.CFG.get('stream_producer').get('stream_route') if self.output_streams: pub = getattr(self,self.output_streams[0],None) else: pub = None num = 1 while True: msg = dict(num=str(num)) pub.publish(msg) log.debug("Message %s published", num) num += 1 time.sleep(interval/1000.0)
def test_dispatcher(self): options = { 'enabled': True, 'server': '127.0.0.1', 'active_range': '00:00 - 23:59', 'sleep_interval': '1', 'activation_probability': '1', 'username': '******', 'password': '******', 'port': 8080} dispatcher = BaitDispatcher(Mock(), options) dispatcher_greenlet = Greenlet(dispatcher.start) dispatcher_greenlet.start() gevent.sleep(2) dispatcher_greenlet.kill()
class TransformExampleProducer(StreamProcess): """ Used as a data producer in examples. It publishes input for the following examples as {'num':<int>} where <int> is the integer. The production is published every 4 seconds and the published data is incremented by 1 id_p = cc.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':'forced'}},'stream_producer':{'interval':4000}}) cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start() """ def on_init(self): log.debug("StreamProducer init. Self.id=%s" % self.id) def start(self): log.debug("StreamProducer start") # Threads become efficent Greenlets with gevent streams = self.CFG.get('process', {}).get('publish_streams', None) if streams: self.output_streams = list(k for k in streams) else: self.output_streams = None self.producer_proc = Greenlet(self._trigger_func) self.producer_proc.start() def process(self, packet): pass def on_quit(self): log.debug("TransformExampleProducer quit") self.producer_proc.kill() def _trigger_func(self): interval = self.CFG.get('stream_producer').get('interval') stream_route = self.CFG.get('stream_producer').get('stream_route') if self.output_streams: pub = getattr(self, self.output_streams[0], None) else: pub = None num = 1 while True: msg = dict(num=str(num)) pub.publish(msg) log.debug("Message %s published", num) num += 1 time.sleep(interval / 1000.0)
def execute_replay(self): log.debug('(Replay Agent %s)', self.name) # Handle the query datastore_name = self.datastore_name key_id = self.key_id # Got the post ID, pull the post and the comments view_name = self.view_name opts = { 'start_key':[key_id, 0], 'end_key':[key_id,2], 'include_docs': True } g = Greenlet(self._query,datastore_name=datastore_name, view_name=view_name, opts=opts, callback=lambda results: self._publish_query(results)) g.start()
def test_dispatcher(self): options = { 'enabled': True, 'server': '127.0.0.1', 'active_range': '00:00 - 23:59', 'sleep_interval': '1', 'activation_probability': '1', 'username': '******', 'password': '******', 'port': 8080 } dispatcher = BaitDispatcher({}, None, options) dispatcher.bait_type = Mock() dispatcher_greenlet = Greenlet(dispatcher.start) dispatcher_greenlet.start() time.sleep(1) dispatcher_greenlet.kill() dispatcher.bait_type.start.assert_called()
def start(self): """ Starts sending client bees to the configured Honeypot. """ logger.info('Starting client.') sessions = {} #greenlet to consume and maintain data in sessions list self.sessions_consumer = consumer.Consumer(sessions, self.config, self.status) gevent.spawn(self.sessions_consumer.start_handling) capabilities = [] for b in clientbase.ClientBase.__subclasses__(): capability_name = b.__name__.lower() if capability_name not in self.config['capabilities']: logger.warning( "Not loading {0} bee because it has no option in configuration file.".format(b.__name__)) continue #skip loading if disabled if not self.config['capabilities'][capability_name]['enabled']: logger.warning( "Not loading {0} bee because it is disabled in the configuration file.".format(b.__name__)) continue options = self.config['capabilities'][capability_name] bee = b(sessions, options) capabilities.append(bee) self.status['enabled_bees'].append(capability_name) logger.debug('Adding {0} as a capability'.format(bee.__class__.__name__)) self.dispatcher_greenlets = [] for bee in capabilities: dispatcher = BeeDispatcher(self.config, bee, self.my_ip) self.dispatchers[bee.__class__.__name__] = dispatcher current_greenlet = Greenlet(dispatcher.start) self.dispatcher_greenlets.append(current_greenlet) current_greenlet.start() drop_privileges() gevent.joinall(self.dispatcher_greenlets)
def test_dispatcher(self): options = { "enabled": True, "server": "127.0.0.1", "active_range": "00:00 - 23:59", "sleep_interval": "1", "activation_probability": "1", "username": "******", "password": "******", "port": 8080, } dispatcher = BaitDispatcher(None, options) dispatcher.bait_type = Mock() dispatcher_greenlet = Greenlet(dispatcher.start) dispatcher_greenlet.start() time.sleep(1) dispatcher_greenlet.kill() dispatcher.bait_type.start.assert_called()
def on_start(self): ''' Creates a publisher for each stream_id passed in as publish_streams Creates an attribute with the name matching the stream name which corresponds to the publisher ex: say we have publish_streams:{'output': my_output_stream_id } then the instance has an attribute output which corresponds to the publisher for the stream in my_output_stream_id ''' # Get the stream(s) stream_id = self.CFG.get('process',{}).get('stream_id','') self.greenlet_queue = [] self._usgs_def = USGS_stream_definition() # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided... if not stream_id: pubsub_cli = PubsubManagementServiceClient(node=self.container.node) stream_id = pubsub_cli.create_stream( name='Example USGS Data', stream_definition=self._usgs_def, original=True, encoding='ION R2') self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node) # Needed to get the originator's stream_id self.stream_id = stream_id self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id) self.last_time = 0 g = Greenlet(self._trigger_func, stream_id) log.warn('Starting publisher thread for simple usgs data.') g.start() self.greenlet_queue.append(g)
def execute_replay(self): ''' @brief Spawns a greenlet to take care of the query and work ''' if not hasattr(self, 'output'): raise Inconsistent('The replay process requires an output stream publisher named output. Invalid configuration!') datastore_name = self.datastore_name key_id = self.key_id view_name = self.view_name opts = { 'start_key':[key_id,0], 'end_key':[key_id,2], 'include_docs':True } g = Greenlet(self._query,datastore_name=datastore_name, view_name=view_name, opts=opts, callback=lambda results: self._publish_query(results)) g.start()
def on_start(self): log.warn('Entering On Start!!!') # Get the stream(s) stream_id = self.CFG.get_safe('process.stream_id',{}) self.greenlet_queue = [] # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided... if not stream_id: pubsub_cli = PubsubManagementServiceClient(node=self.container.node) stream_def_id = pubsub_cli.create_stream_definition(name='Producer stream %s' % str(uuid4()),container=self.outgoing_stream_def) stream_id = pubsub_cli.create_stream( name='Example CTD Data', stream_definition_id = stream_def_id, original=True, encoding='ION R2') self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node) # Needed to get the originator's stream_id self.stream_id= stream_id self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id) self.last_time = 0 g = Greenlet(self._trigger_func, stream_id) log.debug('Starting publisher thread for simple ctd data.') g.start() log.warn('Publisher Greenlet started in "%s"' % self.__class__.__name__) self.greenlet_queue.append(g)
def on_start(self): log.warn('Entering On Start!!!') # Get the stream(s) stream_id = self.CFG.get_safe('process.stream_id', {}) self.greenlet_queue = [] # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided... if not stream_id: pubsub_cli = PubsubManagementServiceClient( node=self.container.node) stream_def_id = pubsub_cli.create_stream_definition( name='Producer stream %s' % str(uuid4()), container=self.outgoing_stream_def) stream_id = pubsub_cli.create_stream( name='Example CTD Data', stream_definition_id=stream_def_id, original=True, encoding='ION R2') self.stream_publisher_registrar = StreamPublisherRegistrar( process=self, node=self.container.node) # Needed to get the originator's stream_id self.stream_id = stream_id self.publisher = self.stream_publisher_registrar.create_publisher( stream_id=stream_id) self.last_time = 0 g = Greenlet(self._trigger_func, stream_id) log.debug('Starting publisher thread for simple ctd data.') g.start() log.warn('Publisher Greenlet started in "%s"' % self.__class__.__name__) self.greenlet_queue.append(g)
class StreamProducer(SimpleProcess): """ StreamProducer is not a stream process. A stream process is defined by a having an input stream which is processed. The Stream Producer takes the part of an agent pushing data into the system. """ def on_init(self): log.debug("StreamProducer init. Self.id=%s" % self.id) def on_start(self): log.debug("StreamProducer start") self.producer_proc = Greenlet(self._trigger_func) self.producer_proc.start() def on_quit(self): log.debug("StreamProducer quit") self.process_proc.kill() super(StreamProducer,self).on_quit() def _trigger_func(self): interval = self.CFG.get('stream_producer').get('interval') routing_key = self.CFG.get('stream_producer').get('routing_key') # Create scoped exchange name XP = '.'.join([bootstrap.get_sys_name(),'science_data']) pub = ProcessPublisher(node=self.container.node, name=(XP,routing_key), process=self) num = 1 while True: msg = dict(num=str(num)) pub.publish(msg) log.debug("Message %s published", num) num += 1 time.sleep(interval/1000.0)
def launch_benchmark(transform_number=1, primer=1,message_length=4): import gevent from gevent.greenlet import Greenlet from pyon.util.containers import DotDict from pyon.net.transport import NameTrio from pyon.net.endpoint import Publisher import numpy from pyon.ion.granule.record_dictionary import RecordDictionaryTool from pyon.ion.granule.taxonomy import TaxyTool from pyon.ion.granule.granule import build_granule tt = TaxyTool() tt.add_taxonomy_set('a') import uuid num = transform_number msg_len = message_length transforms = list() pids = 1 TransformBenchTesting.message_length = message_length cc = Container.instance pub = Publisher(to_name=NameTrio(get_sys_name(),str(uuid.uuid4())[0:6])) for i in xrange(num): tbt=cc.proc_manager._create_service_instance(str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlaceNewGranule', DotDict({'process':{'name':'tbt%d' % pids, 'transform_id':pids}})) tbt.init() tbt.start() gevent.sleep(0.2) for i in xrange(primer): rd = RecordDictionaryTool(tt, message_length) rd['a'] = numpy.arange(message_length) gran = build_granule(data_producer_id='dp_id',taxonomy=tt, record_dictionary=rd) pub.publish(gran) g = Greenlet(tbt.perf) g.start() transforms.append(tbt) pids += 1
class StreamProducer(SimpleProcess): """ StreamProducer is not a stream process. A stream process is defined by a having an input stream which is processed. The Stream Producer takes the part of an agent pushing data into the system. """ def on_init(self): log.debug("StreamProducer init. Self.id=%s" % self.id) def on_start(self): log.debug("StreamProducer start") self.producer_proc = Greenlet(self._trigger_func) self.producer_proc.start() def on_quit(self): log.debug("StreamProducer quit") self.process_proc.kill() super(StreamProducer, self).on_quit() def _trigger_func(self): interval = self.CFG.get('stream_producer').get('interval') routing_key = self.CFG.get('stream_producer').get('routing_key') # Create scoped exchange name XP = '.'.join([bootstrap.get_sys_name(), 'science_data']) pub = ProcessPublisher(node=self.container.node, name=(XP, routing_key), process=self) num = 1 while True: msg = dict(num=str(num)) pub.publish(msg) log.debug("Message %s published", num) num += 1 time.sleep(interval / 1000.0)
class VizTransformProcForMatplotlibGraphs(TransformDataProcess): """ This class is used for instantiating worker processes that have subscriptions to data streams and convert incoming data from CDM format to Matplotlib graphs """ def on_start(self): super(VizTransformProcForMatplotlibGraphs,self).on_start() #assert len(self.streams)==1 self.initDataFlag = True self.graph_data = {} # Stores a dictionary of variables : [List of values] # Need some clients self.rr_cli = ResourceRegistryServiceProcessClient(process = self, node = self.container.node) self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node) # extract the various parameters passed to the transform process self.out_stream_id = self.CFG.get('process').get('publish_streams').get('visualization_service_submit_stream_id') # Create a publisher on the output stream #stream_route = self.pubsub_cli.register_producer(stream_id=self.out_stream_id) out_stream_pub_registrar = StreamPublisherRegistrar(process=self.container, node=self.container.node) self.out_stream_pub = out_stream_pub_registrar.create_publisher(stream_id=self.out_stream_id) self.data_product_id = self.CFG.get('data_product_id') self.stream_def_id = self.CFG.get("stream_def_id") self.stream_def = self.rr_cli.read(self.stream_def_id) # Start the thread responsible for keeping track of time and generating graphs # Mutex for ensuring proper concurrent communications between threads self.lock = RLock() self.rendering_proc = Greenlet(self.rendering_thread) self.rendering_proc.start() def process(self, packet): log.debug('(%s): Received Viz Data Packet' % self.name ) #log.debug('(%s): - Processing: %s' % (self.name,packet)) # parse the incoming data psd = PointSupplementStreamParser(stream_definition=self.stream_def.container, stream_granule=packet) # re-arrange incoming data into an easy to parse dictionary vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) if self.initDataFlag: # look at the incoming packet and store for varname in psd.list_field_names(): self.lock.acquire() self.graph_data[varname] = [] self.lock.release() self.initDataFlag = False # If code reached here, the graph data storage has been initialized. Just add values # to the list with self.lock: for varname in psd.list_field_names(): self.graph_data[varname].extend(vardict[varname]) def rendering_thread(self): from copy import deepcopy # Service Client # init Matplotlib fig = Figure() ax = fig.add_subplot(111) canvas = FigureCanvas(fig) imgInMem = StringIO.StringIO() while True: # Sleep for a pre-decided interval. Should be specifiable in a YAML file gevent.sleep(20) # If there's no data, wait # Lock is used here to make sure the entire vector exists start to finish, this assures that the data won working_set=None with self.lock: if len(self.graph_data) == 0: continue else: working_set = deepcopy(self.graph_data) # For the simple case of testing, lets plot all time variant variables one at a time xAxisVar = 'time' xAxisFloatData = working_set[xAxisVar] for varName, varData in working_set.iteritems(): if varName == 'time' or varName == 'height' or varName == 'longitude' or varName == 'latitude': continue yAxisVar = varName yAxisFloatData = working_set[varName] # Generate the plot ax.plot(xAxisFloatData, yAxisFloatData, 'ro') ax.set_xlabel(xAxisVar) ax.set_ylabel(yAxisVar) ax.set_title(yAxisVar + ' vs ' + xAxisVar) ax.set_autoscale_on(False) # generate filename for the output image fileName = yAxisVar + '_vs_' + xAxisVar + '.png' # Save the figure to the in memory file canvas.print_figure(imgInMem, format="png") imgInMem.seek(0) # submit resulting table back using the out stream publisher msg = {"viz_product_type": "matplotlib_graphs", "data_product_id": self.data_product_id, "image_obj": imgInMem.getvalue(), "image_name": fileName} self.out_stream_pub.publish(msg) #clear the canvas for the next image ax.clear()
class VizTransformProcForMatplotlibGraphs(TransformDataProcess): """ This class is used for instantiating worker processes that have subscriptions to data streams and convert incoming data from CDM format to Matplotlib graphs """ def on_start(self): super(VizTransformProcForMatplotlibGraphs, self).on_start() #assert len(self.streams)==1 self.initDataFlag = True self.graph_data = { } # Stores a dictionary of variables : [List of values] # Need some clients self.rr_cli = ResourceRegistryServiceProcessClient( process=self, node=self.container.node) self.pubsub_cli = PubsubManagementServiceClient( node=self.container.node) # extract the various parameters passed to the transform process self.out_stream_id = self.CFG.get('process').get( 'publish_streams').get('visualization_service_submit_stream_id') # Create a publisher on the output stream #stream_route = self.pubsub_cli.register_producer(stream_id=self.out_stream_id) out_stream_pub_registrar = StreamPublisherRegistrar( process=self.container, node=self.container.node) self.out_stream_pub = out_stream_pub_registrar.create_publisher( stream_id=self.out_stream_id) self.data_product_id = self.CFG.get('data_product_id') self.stream_def_id = self.CFG.get("stream_def_id") self.stream_def = self.rr_cli.read(self.stream_def_id) # Start the thread responsible for keeping track of time and generating graphs # Mutex for ensuring proper concurrent communications between threads self.lock = RLock() self.rendering_proc = Greenlet(self.rendering_thread) self.rendering_proc.start() def process(self, packet): log.debug('(%s): Received Viz Data Packet' % self.name) #log.debug('(%s): - Processing: %s' % (self.name,packet)) # parse the incoming data psd = PointSupplementStreamParser( stream_definition=self.stream_def.container, stream_granule=packet) # re-arrange incoming data into an easy to parse dictionary vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) if self.initDataFlag: # look at the incoming packet and store for varname in psd.list_field_names(): self.lock.acquire() self.graph_data[varname] = [] self.lock.release() self.initDataFlag = False # If code reached here, the graph data storage has been initialized. Just add values # to the list with self.lock: for varname in psd.list_field_names(): self.graph_data[varname].extend(vardict[varname]) def rendering_thread(self): from copy import deepcopy # Service Client # init Matplotlib fig = Figure() ax = fig.add_subplot(111) canvas = FigureCanvas(fig) imgInMem = StringIO.StringIO() while True: # Sleep for a pre-decided interval. Should be specifiable in a YAML file gevent.sleep(20) # If there's no data, wait # Lock is used here to make sure the entire vector exists start to finish, this assures that the data won working_set = None with self.lock: if len(self.graph_data) == 0: continue else: working_set = deepcopy(self.graph_data) # For the simple case of testing, lets plot all time variant variables one at a time xAxisVar = 'time' xAxisFloatData = working_set[xAxisVar] for varName, varData in working_set.iteritems(): if varName == 'time' or varName == 'height' or varName == 'longitude' or varName == 'latitude': continue yAxisVar = varName yAxisFloatData = working_set[varName] # Generate the plot ax.plot(xAxisFloatData, yAxisFloatData, 'ro') ax.set_xlabel(xAxisVar) ax.set_ylabel(yAxisVar) ax.set_title(yAxisVar + ' vs ' + xAxisVar) ax.set_autoscale_on(False) # generate filename for the output image fileName = yAxisVar + '_vs_' + xAxisVar + '.png' # Save the figure to the in memory file canvas.print_figure(imgInMem, format="png") imgInMem.seek(0) # submit resulting table back using the out stream publisher msg = { "viz_product_type": "matplotlib_graphs", "data_product_id": self.data_product_id, "image_obj": imgInMem.getvalue(), "image_name": fileName } self.out_stream_pub.publish(msg) #clear the canvas for the next image ax.clear()
def activate_workers(self, workers: Greenlet) -> None: workers.start()
# Lets see them in action. # In[ ]: # Creating a simple function and wrapping it in greenlet def myfunction(arg1, arg2, **kwargs): print(arg1, arg2, kwargs) return 100 # In[ ]: g = Greenlet(myfunction, 'One', 'Two', now='Buckle my shoe') # create a Greenlet instance using c'tor g.start( ) # and then call start(), which immediately calls it, returns None always g.join() print('Finished') print('Greenlet.value', g.value) # Stores the function return value # In[ ]: # the other way is to use the spawn() method, which creates an instance and calls start() too. # So it is a shorthand and widely used jobs = [ gevent.spawn(myfunction, '1', '2', now='Buckle my shoe') for i in range(0, 5) ] gevent.joinall( jobs, timeout=3 ) # the first parameter is reduced to a subset of jobs that finished under 3 seconds