Esempio n. 1
0
    def _get_param_dicts_from_streams(self):

        pubsub = PubsubManagementServiceProcessClient(process=self)

        self.stream_def_cond = pubsub.read_stream_definition(
            stream_id=self.cond_stream)
        self.stream_def_pres = pubsub.read_stream_definition(
            stream_id=self.pres_stream)
        self.stream_def_temp = pubsub.read_stream_definition(
            stream_id=self.temp_stream)

        self.params = {}
        self.params['conductivity'] = self.stream_def_cond._id
        self.params['pressure'] = self.stream_def_pres._id
        self.params['temperature'] = self.stream_def_temp._id
Esempio n. 2
0
    def on_start(self):
        '''
        Starts the process
        '''
        log.info('Replay Process Started')
        super(ReplayProcess,self).on_start()
        dsm_cli = DatasetManagementServiceProcessClient(process=self)
        pubsub  = PubsubManagementServiceProcessClient(process=self)

        self.dataset_id      = self.CFG.get_safe('process.dataset_id', None)
        self.delivery_format = self.CFG.get_safe('process.delivery_format',{})
        self.start_time      = self.CFG.get_safe('process.query.start_time', None)
        self.end_time        = self.CFG.get_safe('process.query.end_time', None)
        self.stride_time     = self.CFG.get_safe('process.query.stride_time', None)
        self.parameters      = self.CFG.get_safe('process.query.parameters',None)
        self.publish_limit   = self.CFG.get_safe('process.query.publish_limit', 10)
        self.tdoa            = self.CFG.get_safe('process.query.tdoa',None)
        self.stream_id       = self.CFG.get_safe('process.publish_streams.output', '')
        self.stream_def      = pubsub.read_stream_definition(stream_id=self.stream_id)
        self.stream_def_id   = self.stream_def._id
        self.replay_thread   = None

        self.publishing.clear()
        self.play.set()
        self.end.clear()

        if self.dataset_id is None:
            raise BadRequest('dataset_id not specified')

        self.dataset = dsm_cli.read_dataset(self.dataset_id)
        self.pubsub = PubsubManagementServiceProcessClient(process=self)
Esempio n. 3
0
 def on_start(self):
     self.pdict = None
     stream_id = self.CFG.get_safe('process.stream_id')
     pubsub_cli = PubsubManagementServiceProcessClient(process=self)
     self.stream_def = pubsub_cli.read_stream_definition(
         stream_id=stream_id)
     super(BetterDataProducer, self).on_start()
Esempio n. 4
0
    def on_start(self):
        '''
        Starts the process
        '''
        log.info('Replay Process Started')
        super(ReplayProcess,self).on_start()
        dsm_cli = DatasetManagementServiceProcessClient(process=self)
        pubsub  = PubsubManagementServiceProcessClient(process=self)

        self.dataset_id      = self.CFG.get_safe('process.dataset_id', None)
        self.delivery_format = self.CFG.get_safe('process.delivery_format',{})
        self.start_time      = self.CFG.get_safe('process.query.start_time', None)
        self.end_time        = self.CFG.get_safe('process.query.end_time', None)
        self.stride_time     = self.CFG.get_safe('process.query.stride_time', None)
        self.parameters      = self.CFG.get_safe('process.query.parameters',None)
        self.publish_limit   = self.CFG.get_safe('process.query.publish_limit', 10)
        self.tdoa            = self.CFG.get_safe('process.query.tdoa',None)
        self.stream_id       = self.CFG.get_safe('process.publish_streams.output', '')
        self.stream_def      = pubsub.read_stream_definition(stream_id=self.stream_id)
        self.stream_def_id   = self.stream_def._id

        self.publishing.clear()
        self.play.set()
        self.end.clear()

        if self.dataset_id is None:
            raise BadRequest('dataset_id not specified')

        self.dataset = dsm_cli.read_dataset(self.dataset_id)
        self.pubsub = PubsubManagementServiceProcessClient(process=self)
Esempio n. 5
0
    def on_start(self):
        super(SalinityTransform, self).on_start()

        self.sal_stream = self.CFG.process.publish_streams.values()[0]

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.sal_stream)
    def on_start(self):
        super(EventTriggeredTransform_A, self).on_start()

        self.awake = False

        self.cond_stream = self.CFG.process.publish_streams.values()[0]

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.cond_stream)
    def on_start(self):
        super(EventTriggeredTransform_A, self).on_start()

        self.awake = False

        self.cond_stream = self.CFG.process.publish_streams.values()[0]

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(
            stream_id=self.cond_stream)
    def on_start(self):
        super(CTDL1ConductivityTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('conductivity'):
            raise BadRequest("For CTD transforms, please send the stream_id using "
                             "a special keyword (ex: conductivity)")
        self.cond_stream = self.CFG.process.publish_streams.conductivity

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.cond_stream)
Esempio n. 9
0
    def on_start(self):
        super(DensityTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('density'):
            raise BadRequest("For CTD transforms, please send the stream_id "
                                 "using a special keyword (ex: density)")
        self.dens_stream = self.CFG.process.publish_streams.density

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.dens_stream)
    def on_start(self):
        super(CTDL1TemperatureTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('temperature'):
            raise BadRequest("For CTD transforms, please send the stream_id using a "
                                 "special keyword (ex: temperature)")

        self.temp_stream = self.CFG.process.publish_streams.temperature

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.temp_stream)
Esempio n. 11
0
class VizTransformGoogleDT(TransformDataProcess):

    """
    This class is used for  converting incoming data from CDM format to JSON style Google DataTables

    Note: One behaviour that this class is expected to achieve specifically is to determine if its supposed
        to work as a realtime transform (exists indefinitely and maintains a sliding window of data) or as
        a replay transform (one-shot).

        [2] This transform behaves as an instantaneous forwarder. There is no waiting for the entire stream
            to create the complete datatable. As the granules come in, they are translated to the datatable
            'components'. Components, because we are not creating the actual datatable in this code. That's the job
            of the viz service to put all the components of a datatable together in JSON format before sending it
            to the client

        [3] The time stamp in the incoming stream can't be converted to the datetime object here because
            the Raw stream definition only expects regular primitives (strings, floats, ints etc)

    Usage: https://gist.github.com/3834918

    """

    output_bindings = ['google_dt_components']


    def __init__(self):
        super(VizTransformGoogleDT, self).__init__()

    def on_start(self):
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)

        self.stream_info  = self.CFG.get_safe('process.publish_streams', {})
        self.stream_names = self.stream_info.keys()
        self.stream_ids   = self.stream_info.values()
        if not self.stream_names:
            raise BadRequest('Google DT Transform has no output streams.')

        super(VizTransformGoogleDT,self).on_start()



    def recv_packet(self, packet, in_stream_route, in_stream_id):
        log.info('Received packet')
        outgoing = VizTransformGoogleDTAlgorithm.execute(packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(outgoing)


    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        self.stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        return self.stream_def._id
Esempio n. 12
0
    def on_start(self):
        super(PresfL1Transform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('seafloor_pressure'):
            raise BadRequest("For the PresfL1Transform, please send the stream_id using "
                             "a special keyword (ex: seafloor_pressure)")

        self.pres_stream = self.CFG.process.publish_streams.seafloor_pressure

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.pres_stream)
Esempio n. 13
0
    def on_start(self):
        super(CTDL1PressureTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('pressure'):
            raise BadRequest("For CTD transforms, please send the stream_id using "
                                 "a special keyword (ex: pressure)")

        self.pres_stream = self.CFG.process.publish_streams.pressure

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.pres_stream)
Esempio n. 14
0
    def on_start(self):
        super(CTDBP_L0_all, self).on_start()

        self.L0_stream_id = self.CFG.process.publish_streams.values()[0]

        log.debug("the output stream: %s", self.L0_stream_id)

        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_def_L0 = pubsub.read_stream_definition(stream_id=self.L0_stream_id)

        self.params = {'L0_stream' : self.stream_def_L0._id }

        log.debug("the params: %s", self.params)
Esempio n. 15
0
    def on_start(self):
        super(CTDBP_L1_Transform, self).on_start()

        self.L1_stream_id = self.CFG.process.publish_streams.values()[0]

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        stream_def = pubsub.read_stream_definition(stream_id=self.L1_stream_id)
        self.stream_definition_id = stream_def._id

        self.temp_calibration_coeffs = self.CFG.process.calibration_coeffs['temp_calibration_coeffs']
        self.pres_calibration_coeffs = self.CFG.process.calibration_coeffs['pres_calibration_coeffs']
        self.cond_calibration_coeffs = self.CFG.process.calibration_coeffs['cond_calibration_coeffs']
    def on_start(self):

        pubsub = PubsubManagementServiceProcessClient(process=self)
        if self.CFG.process.publish_streams.has_key("salinity"):
            self.sal_stream = self.CFG.process.publish_streams.salinity
        elif self.CFG.process.publish_streams.has_key("output"):
            self.sal_stream = self.CFG.process.publish_streams.output
            self.salinity = self.output
        self.CFG.process.stream_id = self.sal_stream
        self.stream_id = self.sal_stream
        self.stream_def = pubsub.read_stream_definition(stream_id=self.sal_stream)

        super(SalinityDoubler, self).on_start()
Esempio n. 17
0
class VizTransformGoogleDT(TransformDataProcess):
    """
    This class is used for  converting incoming data from CDM format to JSON style Google DataTables

    Note: One behaviour that this class is expected to achieve specifically is to determine if its supposed
        to work as a realtime transform (exists indefinitely and maintains a sliding window of data) or as
        a replay transform (one-shot).

        [2] This transform behaves as an instantaneous forwarder. There is no waiting for the entire stream
            to create the complete datatable. As the granules come in, they are translated to the datatable
            'components'. Components, because we are not creating the actual datatable in this code. That's the job
            of the viz service to put all the components of a datatable together in JSON format before sending it
            to the client

        [3] The time stamp in the incoming stream can't be converted to the datetime object here because
            the Raw stream definition only expects regular primitives (strings, floats, ints etc)

    Usage: https://gist.github.com/3834918

    """

    output_bindings = ['google_dt_components']

    def __init__(self):
        super(VizTransformGoogleDT, self).__init__()

    def on_start(self):
        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)

        self.stream_info = self.CFG.get_safe('process.publish_streams', {})
        self.stream_names = self.stream_info.keys()
        self.stream_ids = self.stream_info.values()
        if not self.stream_names:
            raise BadRequest('Google DT Transform has no output streams.')

        super(VizTransformGoogleDT, self).on_start()

    def recv_packet(self, packet, in_stream_route, in_stream_id):
        log.info('Received packet')
        outgoing = VizTransformGoogleDTAlgorithm.execute(
            packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(outgoing)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        self.stream_def = self.pubsub_management.read_stream_definition(
            stream_id=stream_id)
        return self.stream_def._id
Esempio n. 18
0
    def on_start(self):
        super(PresfL0Splitter, self).on_start()

        if not self.CFG.process.publish_streams.has_key('absolute_pressure'):
            raise BadRequest(
                "For the PresfL0Splitter, please send the stream_id using "
                "a special keyword (ex: absolute_pressure)")

        self.pres_stream = self.CFG.process.publish_streams.absolute_pressure

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(
            stream_id=self.pres_stream)
Esempio n. 19
0
    def on_start(self):
        super(CTDBP_L0_all, self).on_start()

        self.L0_stream_id = self.CFG.process.publish_streams.values()[0]

        log.debug("the output stream: %s", self.L0_stream_id)

        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_def_L0 = pubsub.read_stream_definition(
            stream_id=self.L0_stream_id)

        self.params = {'L0_stream': self.stream_def_L0._id}

        log.debug("the params: %s", self.params)
    def on_start(self):
        super(EventTriggeredTransform_B, self).on_start()

        self.awake = False

        if not self.CFG.process.publish_streams.has_key('output'):
            raise BadRequest("For event triggered transform, please send the stream_id "
                             "using the special keyword, output")

        self.output = self.CFG.process.publish_streams.output

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.output)
    def on_start(self):

        pubsub = PubsubManagementServiceProcessClient(process=self)
        if self.CFG.process.publish_streams.has_key('salinity'):
            self.sal_stream = self.CFG.process.publish_streams.salinity
        elif self.CFG.process.publish_streams.has_key('output'):
            self.sal_stream = self.CFG.process.publish_streams.output
            self.salinity = self.output
        self.CFG.process.stream_id = self.sal_stream
        self.stream_id = self.sal_stream
        self.stream_def = pubsub.read_stream_definition(
            stream_id=self.sal_stream)

        super(SalinityDoubler, self).on_start()
Esempio n. 22
0
    def on_start(self):
        super(SimpleCtdPublisher,self).on_start()
        pubsub_cli = PubsubManagementServiceProcessClient(process=self)
        self.stream_id = self.CFG.get_safe('process.stream_id',{})
        self.interval  = self.CFG.get_safe('process.interval', 1.0)
        #self.last_time = self.CFG.get_safe('process.last_time', 0)

        self.stream_def = pubsub_cli.read_stream_definition(stream_id=self.stream_id)
        self.pdict      = self.stream_def.parameter_dictionary


        self.finished = gevent.event.Event()
        self.greenlet = gevent.spawn(self.publish_loop)
        self._stats['publish_count'] = 0
        log.info('SimpleCTDPublisher started, publishing to %s', self.publisher.stream_route.__dict__)
Esempio n. 23
0
    def on_start(self):
        super(CTDBP_L0_all, self).on_start()

        if not self.CFG.process.publish_streams.has_key('L0_stream'):
            raise BadRequest("For CTD transforms, please send the stream_id for the L0_stream using "
                             "a special keyword (L0_stream)")
        self.L0_stream_id = self.CFG.process.publish_streams.L0_stream

        log.debug("the output stream: %s", self.L0_stream_id)

        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_def_L0 = pubsub.read_stream_definition(stream_id=self.L0_stream_id)

        self.params = {'L0_stream' : self.stream_def_L0._id }

        log.debug("the params: %s", self.params)
Esempio n. 24
0
    def on_start(self):
        super(CTDBP_L0_all, self).on_start()

        if not self.CFG.process.publish_streams.has_key('L0_stream'):
            raise BadRequest("For CTD transforms, please send the stream_id for the L0_stream using "
                             "a special keyword (L0_stream)")
        self.L0_stream_id = self.CFG.process.publish_streams.L0_stream

        log.debug("the output stream: %s", self.L0_stream_id)

        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_def_L0 = pubsub.read_stream_definition(stream_id=self.L0_stream_id)

        self.params = {'L0_stream' : self.stream_def_L0._id }

        log.debug("the params: %s", self.params)
    def on_start(self):
        super(SimpleCtdPublisher, self).on_start()
        pubsub_cli = PubsubManagementServiceProcessClient(process=self)
        self.stream_id = self.CFG.get_safe('process.stream_id', {})
        self.interval = self.CFG.get_safe('process.interval', 1.0)
        #self.last_time = self.CFG.get_safe('process.last_time', 0)

        self.stream_def = pubsub_cli.read_stream_definition(
            stream_id=self.stream_id)
        self.pdict = self.stream_def.parameter_dictionary

        self.finished = gevent.event.Event()
        self.greenlet = gevent.spawn(self.publish_loop)
        self._stats['publish_count'] = 0
        log.info('SimpleCTDPublisher started, publishing to %s',
                 self.publisher.stream_route.__dict__)
Esempio n. 26
0
    def on_start(self):
        super(EventTriggeredTransform_B, self).on_start()

        self.awake = False

        if not self.CFG.process.publish_streams.has_key('output'):
            raise BadRequest(
                "For event triggered transform, please send the stream_id "
                "using the special keyword, output")

        self.output = self.CFG.process.publish_streams.output

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(
            stream_id=self.output)
Esempio n. 27
0
    def on_start(self):
        super(CTDBP_L1_Transform, self).on_start()

        self.L1_stream_id = self.CFG.process.publish_streams.values()[0]

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        stream_def = pubsub.read_stream_definition(stream_id=self.L1_stream_id)
        self.stream_definition_id = stream_def._id

        self.temp_calibration_coeffs = self.CFG.process.calibration_coeffs[
            'temp_calibration_coeffs']
        self.pres_calibration_coeffs = self.CFG.process.calibration_coeffs[
            'pres_calibration_coeffs']
        self.cond_calibration_coeffs = self.CFG.process.calibration_coeffs[
            'cond_calibration_coeffs']
Esempio n. 28
0
    def on_start(self):
        super(CTDBP_DensityTransform, self).on_start()

        self.dens_stream_id = self.CFG.process.publish_streams.values()[0]

        lat = self.CFG.get_safe('process.lat',None)
        if lat is None:
            raise BadRequest('Latitude is required to determine density')
        lon = self.CFG.get_safe('process.lon',None)
        if lon is None:
            raise BadRequest('Longitude is required to determine density')

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.dens_stream_id)

        self.params = {'stream_def' : self.stream_definition._id, 'lat': lat, 'lon' : lon}
Esempio n. 29
0
    def on_start(self):
        super(CTDBP_L1_Transform, self).on_start()

        #  Validate the CFG used to launch the transform has all the required fields
        if not self.CFG.process.publish_streams.has_key('L1_stream'):
            raise BadRequest("For CTDBP transforms, please send the stream_id for the L1_stream using "
                     "a special keyword (L1_stream)")

        self.L1_stream_id = self.CFG.process.publish_streams.L1_stream

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        stream_def = pubsub.read_stream_definition(stream_id=self.L1_stream_id)
        self.stream_definition_id = stream_def._id

        self.temp_calibration_coeffs = self.CFG.process.calibration_coeffs['temp_calibration_coeffs']
        self.pres_calibration_coeffs = self.CFG.process.calibration_coeffs['pres_calibration_coeffs']
        self.cond_calibration_coeffs = self.CFG.process.calibration_coeffs['cond_calibration_coeffs']
Esempio n. 30
0
    def on_start(self):
        super(CTDBP_L1_Transform, self).on_start()

        #  Validate the CFG used to launch the transform has all the required fields
        if not self.CFG.process.publish_streams.has_key('L1_stream'):
            raise BadRequest("For CTDBP transforms, please send the stream_id for the L1_stream using "
                     "a special keyword (L1_stream)")

        self.L1_stream_id = self.CFG.process.publish_streams.L1_stream

        calibration_coeffs= self.CFG.process.calibration_coeffs

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.L1_stream_id)

        self.params = {}
        self.params['stream_def_id'] = self.stream_definition._id
        self.params['calibration_coeffs'] = calibration_coeffs
Esempio n. 31
0
    def on_start(self):
        super(CTDBP_DensityTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('density'):
            raise BadRequest("For CTD transforms, please send the stream_id using a special keyword (ex: density)")
        self.dens_stream_id = self.CFG.process.publish_streams.density

        lat = self.CFG.get_safe('process.lat',None)
        if lat is None:
            raise BadRequest('Latitude is required to determine density')
        lon = self.CFG.get_safe('process.lon',None)
        if lon is None:
            raise BadRequest('Lonitude is required to determine density')

        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.dens_stream_id)

        self.params = {'stream_def' : self.stream_definition._id, 'lat': lat, 'lon' : lon}
Esempio n. 32
0
    def on_start(self):
        super(CTDBP_DensityTransform, self).on_start()

        if not self.CFG.process.publish_streams.has_key('density'):
            raise BadRequest("For CTD transforms, please send the stream_id "
                             "using a special keyword (ex: density)")
        self.dens_stream_id = self.CFG.process.publish_streams.density

        lat = self.CFG.get_safe('process.lat',None)
        if lat is None:
            raise BadRequest('Latitude is required to determine density')
        lon = self.CFG.get_safe('process.lon',None)
        if lon is None:
            raise BadRequest('Lonitude is required to determine density')


        # Read the parameter dict from the stream def of the stream
        pubsub = PubsubManagementServiceProcessClient(process=self)
        self.stream_definition = pubsub.read_stream_definition(stream_id=self.dens_stream_id)
        
        self.params = {'stream_def' : self.stream_definition._id, 'lat': lat, 'lon' : lon}
Esempio n. 33
0
class TransformPrime(TransformDataProcess):
    binding=['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''    
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)

    @memoize_lru(100)
    def read_stream_def(self,stream_id):
        return self.pubsub_management.read_stream_definition(stream_id=stream_id)

    
    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for streams,actor in process_routes.iteritems():
            stream_in_id, stream_out_id = streams
            if stream_id == stream_in_id:
                if actor is None:
                    rdt_out = self._execute_transform(msg, streams)
                else:
                    rdt_out = self._execute_actor(msg, actor, streams)
                self.publish(rdt_out.to_granule(), stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)
   
    def _execute_actor(self, msg, actor, streams):
        stream_in_id,stream_out_id = streams
        stream_def_out = self._read_stream_def(stream_out_id)
        #do the stuff with the actor
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)
        return rdt_out

    def _execute_transform(self, msg, streams):
        stream_in_id,stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)
        
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary
        
        incoming_pdict = ParameterDictionary.load(incoming_pdict_dump)
        outgoing_pdict = ParameterDictionary.load(outgoing_pdict_dump)
        
        merged_pdict = ParameterDictionary()
        for k,v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k,v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass
        
        for field in rdt_temp.fields:
            if isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]
        
        return rdt_out 
 def on_start(self):
     self.pdict = None
     stream_id = self.CFG.get_safe('process.stream_id')
     pubsub_cli = PubsubManagementServiceProcessClient(process=self)
     self.stream_def = pubsub_cli.read_stream_definition(stream_id=stream_id)
     super(CCDataProducer,self).on_start()
Esempio n. 35
0
class TransformPrime(TransformDataProcess):
    binding=['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''    
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
    
    @memoize_lru(maxsize=100)
    def _read_stream_def(self, stream_id):
        return self.pubsub_management.read_stream_definition(stream_id=stream_id)
    
    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for streams,actor in process_routes.iteritems():
            stream_in_id, stream_out_id = streams
            if stream_id == stream_in_id:
                if actor is None:
                    rdt_out = self._execute_transform(msg, streams)
                else:
                    rdt_out = self._execute_actor(msg, actor, streams)
                self.publish(rdt_out.to_granule(), stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)
   
    def _execute_actor(self, msg, actor, streams):
        stream_in_id,stream_out_id = streams
        stream_def_out = self._read_stream_def(stream_out_id)
        #do the stuff with the actor
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)
        return rdt_out

    def _execute_transform(self, msg, streams):
        stream_in_id,stream_out_id = streams
        stream_def_in = self._read_stream_def(stream_in_id)
        stream_def_out = self._read_stream_def(stream_out_id)
        
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary
        
        incoming_pdict = ParameterDictionary.load(incoming_pdict_dump)
        outgoing_pdict = ParameterDictionary.load(outgoing_pdict_dump)
        
        merged_pdict = dict([(k,v) for k,v in incoming_pdict.iteritems()] + [(k,v) for k,v in outgoing_pdict.iteritems()])
        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)
        #modify the shape of the rdt out since we are using _rd then _shp will never get set
        rdt_out._shp = rdt_in._shp
        if rdt_out._available_fields is None: rdt_out._available_fields = []
        if rdt_in._available_fields is None: rdt_in._available_fields = []
        for key,pctup in merged_pdict.iteritems():
            n,pc = pctup
            #if function then a transform is applied to calculate values
            if isinstance(pc.param_type, ParameterFunctionType):
                #apply transform
                pv = get_value_class(pc.param_type, rdt_in.domain)
                #recursive function applies values
                def pval_callback(name, slice_):
                    result = None
                    #search for input data...first level input
                    if name in rdt_in._available_fields:
                        result = rdt_in[name]
                    else:
                        #not first level data so continue to evaluate
                        n,pc = merged_pdict[name]
                        pv = get_value_class(pc.param_type, rdt_in.domain)
                        pv._pval_callback = pval_callback
                        result = pv[:]
                    return result
                #set the evaluation callback so it can find values in the input stream
                pv._pval_callback = pval_callback
                if key in rdt_out._available_fields:
                    #rdt to and from granule wraps result in a paramval so no need
                    #paramval = rdt_out.get_paramval(pc.param_type, rdt_in.domain, pv[:])
                    #paramval._pval_callback = pval_callback
                    #rdt_out._rd[key] = paramval
                    rdt_out._rd[key] = pv[:]
            else:
                #field exists in both the in and the out stream so pass it along to the output stream
                if key in rdt_in._available_fields and key in rdt_out._available_fields:
                    #pass through
                    rdt_out._rd[key] = rdt_in._rd[key][:]
        return rdt_out 
Esempio n. 36
0
class TransformPrime(TransformDataProcess):
    binding=['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''    
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)

    @memoize_lru(100)
    def read_stream_def(self,stream_id):
        return self.pubsub_management.read_stream_definition(stream_id=stream_id)

    
    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id,routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module), actor['class'])
            raise
        try:
            execute = getattr(cls,'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

   
    def _execute_actor(self, msg, actor, streams):
        stream_in_id,stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _execute_transform(self, msg, streams):
        stream_in_id,stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)
        
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary
        
        incoming_pdict = ParameterDictionary.load(incoming_pdict_dump)
        outgoing_pdict = ParameterDictionary.load(outgoing_pdict_dump)
        
        merged_pdict = ParameterDictionary()
        for k,v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k,v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass
        
        for field in rdt_temp.fields:
            if isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]
        
        return rdt_out 
Esempio n. 37
0
class VizTransformMatplotlibGraphs(TransformStreamPublisher, TransformEventListener, TransformStreamListener):

    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    output_bindings = ['graph_image_param_dict']
    event_timer_interval = None


    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process = self)

        self.stream_info  = self.CFG.get_safe('process.publish_streams',{})
        self.stream_names = self.stream_info.keys()
        self.stream_ids   = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods= self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin, event_subtype="")

        super(VizTransformMatplotlibGraphs,self).on_start()

    # when tranform is used as a data process
    def recv_packet(self, packet, in_stream_route, in_stream_id):
        #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet
        if self.event_timer_interval:
            return

        log.info('Received packet')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        return stream_def._id

    def process_event(self, msg, headers):

        return

    def interval_timer_callback(self, *args, **kwargs):
        #Find out the input data product to this process
        in_dp_id = self.CFG.get_safe('in_dp_id')

        print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset, RT.Dataset, True)
        if ds_ids is None or not ds_ids:
            return None

        # retrieve data for the specified time interval. Setup up query from pass config first
        query = {}

        param_list_str = self.CFG.get_safe('parameters')
        if param_list_str:
            query['parameters'] = param_list_str.split(', ')
            # append time if not present in list of parameters
            if not 'time' in query['parameters']:
                query['parameters'].append('time')


        query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time(time.time()) # Now
        query['stride_time'] = 1
        if self.CFG.get_safe('graph_time_period'):
            query['start_time'] = query['end_time'] - self._str_to_secs(self.CFG.get_safe('graph_time_period'))

        #print " >>>>>>>>>>>>>> QUERY = ", query

        #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time})
        retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0], query=query)

        # add extra parameters to query passed in config that are not needed by data retrieval
        if self.CFG.get_safe('resolution'):
            query['resolution'] = self.CFG.get_safe('resolution')

        # send the granule through the Algorithm code to get the matplotlib graphs
        mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True)

        mpl_stream_def = self.pubsub_client.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id)
        fileName = self.CFG.get_safe('graph_time_period')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=query, params=mpl_stream_def, fileName=fileName)

        if mpl_data_granule == None:
            return None

        # publish on all specified output streams
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

        return

    def _str_to_secs(self, time_period):
        # this method converts commonly used time periods to its actual seconds counterpart
        #separate alpha and numeric parts of the time period
        time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ')
        time_a = time_period.lower().lstrip('0123456789. ')

        # determine if user specified, secs, mins, hours, days, weeks, months, years
        factor = None
        if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds":
            factor = 1
        if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes":
            factor = 60
        if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours":
            factor = 60 * 60
        if time_a == "day" or time_a == "days":
            factor = 60 * 60 * 24
        if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks":
            factor = 60 * 60 * 24 * 7
        if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months":
            factor = 60 * 60 * 24 * 30
        if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years":
            factor = 60 * 60 * 24 * 365

        time_period_secs = float(time_n) * factor
        return time_period_secs


    def on_quit(self):

        #Cancel the timer
        if hasattr(self, 'interval_timer_id'):
            self.ssclient.cancel_timer(self.interval_timer_id)

        super(VizTransformMatplotlibGraphs,self).on_quit()
Esempio n. 38
0
class VizTransformMatplotlibGraphs(TransformStreamPublisher,
                                   TransformEventListener,
                                   TransformStreamListener):
    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    output_bindings = ['graph_image_param_dict']
    event_timer_interval = None

    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(
            process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process=self)

        self.stream_info = self.CFG.get_safe('process.publish_streams', {})
        self.stream_names = self.stream_info.keys()
        self.stream_ids = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods = self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent",
                                  callback=self.interval_timer_callback,
                                  origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(
                start_time="now",
                interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin,
                event_subtype="")

        super(VizTransformMatplotlibGraphs, self).on_start()

    # when tranform is used as a data process
    def recv_packet(self, packet, in_stream_route, in_stream_id):
        #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet
        if self.event_timer_interval:
            return

        log.info('Received packet')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(
            packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        stream_def = self.pubsub_management.read_stream_definition(
            stream_id=stream_id)
        return stream_def._id

    def process_event(self, msg, headers):

        return

    def interval_timer_callback(self, *args, **kwargs):
        #Find out the input data product to this process
        in_dp_id = self.CFG.get_safe('in_dp_id')

        print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids, _ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset,
                                               RT.Dataset, True)
        if ds_ids is None or not ds_ids:
            return None

        # retrieve data for the specified time interval. Setup up query from pass config first
        query = {}

        param_list_str = self.CFG.get_safe('parameters')
        if param_list_str:
            query['parameters'] = param_list_str.split(', ')
            # append time if not present in list of parameters
            if not 'time' in query['parameters']:
                query['parameters'].append('time')

        query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time(
            time.time())  # Now
        query['stride_time'] = 1
        if self.CFG.get_safe('graph_time_period'):
            query['start_time'] = query['end_time'] - self._str_to_secs(
                self.CFG.get_safe('graph_time_period'))

        #print " >>>>>>>>>>>>>> QUERY = ", query

        #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time})
        retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],
                                                                query=query)

        # add extra parameters to query passed in config that are not needed by data retrieval
        if self.CFG.get_safe('resolution'):
            query['resolution'] = self.CFG.get_safe('resolution')

        # send the granule through the Algorithm code to get the matplotlib graphs
        mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name(
            'graph_image_param_dict', id_only=True)

        mpl_stream_def = self.pubsub_client.create_stream_definition(
            'mpl', parameter_dictionary_id=mpl_pdict_id)
        fileName = self.CFG.get_safe('graph_time_period')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(
            retrieved_granule,
            config=query,
            params=mpl_stream_def,
            fileName=fileName)

        if mpl_data_granule == None:
            return None

        # publish on all specified output streams
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

        return

    def _str_to_secs(self, time_period):
        # this method converts commonly used time periods to its actual seconds counterpart
        #separate alpha and numeric parts of the time period
        time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ')
        time_a = time_period.lower().lstrip('0123456789. ')

        # determine if user specified, secs, mins, hours, days, weeks, months, years
        factor = None
        if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds":
            factor = 1
        if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes":
            factor = 60
        if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours":
            factor = 60 * 60
        if time_a == "day" or time_a == "days":
            factor = 60 * 60 * 24
        if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks":
            factor = 60 * 60 * 24 * 7
        if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months":
            factor = 60 * 60 * 24 * 30
        if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years":
            factor = 60 * 60 * 24 * 365

        time_period_secs = float(time_n) * factor
        return time_period_secs

    def on_quit(self):

        #Cancel the timer
        if hasattr(self, 'interval_timer_id'):
            self.ssclient.cancel_timer(self.interval_timer_id)

        super(VizTransformMatplotlibGraphs, self).on_quit()
Esempio n. 39
0
class TransformPrime(TransformDataProcess):
    binding=['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''    
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.stored_values = StoredValueManager(self.container)
        self.input_data_product_ids = self.CFG.get_safe('process.input_products', [])
        self.output_data_product_ids = self.CFG.get_safe('process.output_products', [])
        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent,callback=self._add_lookups, auto_delete=True)
        self.lookup_monitor.start()

    def on_quit(self):
        self.lookup_monitor.stop()
        TransformDataProcess.on_quit(self)

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin in self.input_data_product_ids + self.output_data_product_ids:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)


    @memoize_lru(100)
    def read_stream_def(self,stream_id):
        return self.pubsub_management.read_stream_definition(stream_id=stream_id)

    
    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id,routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module), actor['class'])
            raise
        try:
            execute = getattr(cls,'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

   
    def _execute_actor(self, msg, actor, streams):
        stream_in_id,stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _merge_pdicts(self, pdict1, pdict2):
        incoming_pdict = ParameterDictionary.load(pdict1)
        outgoing_pdict = ParameterDictionary.load(pdict2)
        
        merged_pdict = ParameterDictionary()
        for k,v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k,v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        return merged_pdict

    def _merge_rdt(self, stream_def_in, stream_def_out):
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary

        merged_pdict = self._merge_pdicts(incoming_pdict_dump, outgoing_pdict_dump)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        return rdt_temp


    def _get_lookup_value(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs

        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_values.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')

        return None

    def _execute_transform(self, msg, streams):
        stream_in_id,stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)

        rdt_temp = self._merge_rdt(stream_def_in, stream_def_out)
        
        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass

        rdt_temp.fetch_lookup_values()

        for lookup_field in rdt_temp.lookup_values():
            s = lookup_field
            stored_value = self._get_lookup_value(rdt_temp.context(s).lookup_value)
            if stored_value is not None:
                rdt_temp[s] = stored_value
        
        for field in rdt_temp.fields:
            if isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]
        
        return rdt_out 
Esempio n. 40
0
class TransformPrime(TransformDataProcess):
    binding = ['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.stored_values = StoredValueManager(self.container)
        self.input_data_product_ids = self.CFG.get_safe(
            'process.input_products', [])
        self.output_data_product_ids = self.CFG.get_safe(
            'process.output_products', [])
        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.lookup_monitor.start()

    def on_quit(self):
        self.lookup_monitor.stop()
        TransformDataProcess.on_quit(self)

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin in self.input_data_product_ids + self.output_data_product_ids:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    @memoize_lru(100)
    def read_stream_def(self, stream_id):
        return self.pubsub_management.read_stream_definition(
            stream_id=stream_id)

    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id, routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(
                            msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(
                            msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module),
                          actor['class'])
            raise
        try:
            execute = getattr(cls, 'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

    def _execute_actor(self, msg, actor, streams):
        stream_in_id, stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _merge_pdicts(self, pdict1, pdict2):
        incoming_pdict = ParameterDictionary.load(pdict1)
        outgoing_pdict = ParameterDictionary.load(pdict2)

        merged_pdict = ParameterDictionary()
        for k, v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k, v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        return merged_pdict

    def _merge_rdt(self, stream_def_in, stream_def_out):
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary

        merged_pdict = self._merge_pdicts(incoming_pdict_dump,
                                          outgoing_pdict_dump)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        return rdt_temp

    def _get_lookup_value(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs

        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_values.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')

        return None

    def _execute_transform(self, msg, streams):
        stream_in_id, stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)

        rdt_temp = self._merge_rdt(stream_def_in, stream_def_out)

        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass

        rdt_temp.fetch_lookup_values()

        for lookup_field in rdt_temp.lookup_values():
            s = lookup_field
            stored_value = self._get_lookup_value(
                rdt_temp.context(s).lookup_value)
            if stored_value is not None:
                rdt_temp[s] = stored_value

        for field in rdt_temp.fields:
            if isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]

        return rdt_out
Esempio n. 41
0
class VizTransformMatplotlibGraphs(TransformDataProcess, TransformEventListener):

    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    output_bindings = ['graph_image_param_dict']


    def on_start(self):

        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.ssclient = SchedulerServiceProcessClient(node=self.container.node, process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=self)

        self.stream_info  = self.CFG.get_safe('process.publish_streams',{})
        self.stream_names = self.stream_info.keys()
        self.stream_ids   = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods= self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        event_timer_interval = self.CFG.get_safe('event_timer_interval')
        if event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=event_timer_interval,
                event_origin=event_origin, event_subtype="")

        super(VizTransformMatplotlibGraphs,self).on_start()

    def recv_packet(self, packet, in_stream_route, in_stream_id):
        log.info('Received packet')
        outgoing = VizTransformMatplotlibGraphsAlgorithm.execute(packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(outgoing)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        return stream_def._id

    def process_event(self, msg, headers):

        return

    def interval_timer_callback(self, *args, **kwargs):
        #Find out the input data product to this process

        # retrieve data for every case of the output graph
        return

    def on_quit(self):

        #Cancel the timer
        if hasattr(self, 'interval_timer_id'):
            self.ssclient.cancel_timer(self.interval_timer_id)

        super(VizTransformMatplotlibGraphs,self).on_quit()