def _get_data(cls, config):
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
    #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
    #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
    #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
    #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
    #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
    #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
                #tx_yml = get_safe(config, 'taxonomy')
                #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
                pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

                cnt = calculate_iteration_count(len(parser.sensor_map), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(param_dictionary=pdict)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
                        rdt[name]=d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
                    yield g
            except SlocumParseException as spe:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
    def _get_new_ctd_packet(self, stream_id, length):

        rdt = RecordDictionaryTool(taxonomy=tx)

        #Explicitly make these numpy arrays...
        c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)]) 
        t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) 
        p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) 
        lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) 
        lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) 
        h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) 
        tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) 
        self.last_time = max(tvar)

        log.warn('Got time: %s' % str(tvar))
        log.warn('Got t: %s' % str(t))

        rdt['time'] = tvar
        rdt['lat'] = lat
        rdt['lon'] = lon
        rdt['height'] = h
        rdt['temp'] = t
        rdt['cond'] = c
        rdt['pres'] = p

#        rdt['coordinates'] = rdt0
#        rdt['data'] = rdt1

        g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt)

        return g
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        pressure = get_safe(rdt, 'pres') #psd.get_values('conductivity')

        longitude = get_safe(rdt, 'lon') # psd.get_values('longitude')
        latitude = get_safe(rdt, 'lat')  #psd.get_values('latitude')
        time = get_safe(rdt, 'time') # psd.get_values('time')
        height = get_safe(rdt, 'height') # psd.get_values('time')

        log.warn('Got pressure: %s' % str(pressure))


        # L1
        # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata.
        # 2) Convert the hexadecimal string to a decimal string
        # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM
        #    Convert P_rng (input from metadata) from psia to dbar
        # 4) Perform scaling operation
        #    SBE 37IM
        #    L1 pressure data product (in dbar):


        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        scaled_pressure = pressure

        for i in xrange(len(pressure)):
            #todo: get pressure range from metadata (if present) and include in calc
            scaled_pressure[i] = ( pressure[i])

        root_rdt = RecordDictionaryTool(taxonomy=self.tx)

        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['pres'] = scaled_pressure
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L1_pressure', taxonomy=self.tx, record_dictionary=root_rdt)

        return psc.close_stream_granule()
    def test_combine_granule(self):
        tt = TaxyTool()
        tt.add_taxonomy_set('a')

        rdt = RecordDictionaryTool(tt)
        rdt['a'] = np.array([1,2,3])

        granule1 = build_granule('test',tt,rdt)

        rdt = RecordDictionaryTool(tt)
        rdt['a'] = np.array([4,5,6])
        
        granule2 = build_granule('test',tt,rdt)

        granule3 = combine_granules(granule1,granule2)

        rdt = RecordDictionaryTool.load_from_granule(granule3)

        self.assertTrue(np.allclose(rdt['a'],np.array([1,2,3,4,5,6])))
    def test_build_granule_and_load_from_granule_with_taxonomy(self):

        #Define a taxonomy and add sets. add_taxonomy_set takes one or more names and assigns them to one handle
        tx = TaxyTool()
        tx.add_taxonomy_set('temp', 'long_temp_name')
        tx.add_taxonomy_set('cond', 'long_cond_name')
        tx.add_taxonomy_set('pres', 'long_pres_name')
        tx.add_taxonomy_set('rdt')
        # map is {<local name>: <granule name or path>}

        #Use RecordDictionaryTool to create a record dictionary. Send in the taxonomy so the Tool knows what to expect
        rdt = RecordDictionaryTool(taxonomy=tx)

        #Create some arrays and fill them with random values
        temp_array = np.random.standard_normal(100)
        cond_array = np.random.standard_normal(100)
        pres_array = np.random.standard_normal(100)

        #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc.
        rdt['temp'] = temp_array
        rdt['cond'] = cond_array
        rdt['pres'] = pres_array

        #You can also add in another RecordDictionaryTool, providing the taxonomies are the same.
        rdt2 = RecordDictionaryTool(taxonomy=tx)
        rdt2['temp'] = temp_array
        rdt['rdt'] = rdt2


        g = build_granule(data_producer_id='john', taxonomy=tx, record_dictionary=rdt)

        l_tx = TaxyTool.load_from_granule(g)

        l_rd = RecordDictionaryTool.load_from_granule(g)

        # Make sure we got back the same Taxonomy Object
        self.assertEquals(l_tx._t, tx._t)
        self.assertEquals(l_tx.get_handles('temp'), tx.get_handles('temp'))
        self.assertEquals(l_tx.get_handles('testing_2'), tx.get_handles('testing_2'))


        # Now test the record dictionary object
        self.assertEquals(l_rd._rd, rdt._rd)
        self.assertEquals(l_rd._tx._t, rdt._tx._t)


        for k, v in l_rd.iteritems():
            self.assertIn(k, rdt)

            if isinstance(v, np.ndarray):
                self.assertTrue( (v == rdt[k]).all())

            else:
                self.assertEquals(v._rd, rdt[k]._rd)
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        temperature = get_safe(rdt, 'temp')
        conductivity = get_safe(rdt, 'cond')
        density = get_safe(rdt, 'dens')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')


        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got density: %s' % str(density))
        log.warn('Got temperature: %s' % str(temperature))


        sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=density)

        sa = SA_from_SP(sp, density, longitude, latitude)

        density = rho(sa, temperature, density)

        log.warn('Got density: %s' % str(density))

        # Use the constructor to put data into a granule
        #psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        root_rdt = RecordDictionaryTool(param_dictionary=self.dens)
        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['density'] = density
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L2_density', param_dictionary=self.dens, record_dictionary=root_rdt)
    def _trigger_func(self, stream_id):
        log.debug("SimpleCtdDataProducer:_trigger_func ")


        rdt = RecordDictionaryTool(taxonomy=tx)
#        rdt0 = RecordDictionaryTool(taxonomy=tx)
#        rdt1 = RecordDictionaryTool(taxonomy=tx)


        #@todo - add lots of comments in here
        while True:

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

            t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])

            p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)])

            lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)])

            lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

            h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

            tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)])

            self.last_time = max(tvar)

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon
            rdt['height'] = h
            rdt['temp'] = t
            rdt['cond'] = c
            rdt['pres'] = p


            #todo: use only flat dicts for now, may change later...
#            rdt['coordinates'] = rdt0
#            rdt['data'] = rdt1

            log.debug("SimpleCtdDataProducer: logging published Record Dictionary:\n %s", rdt.pretty_print())

            g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt)

            log.debug('SimpleCtdDataProducer: Sending %d values!' % length)
            self.publisher.publish(g)

            time.sleep(2.0)
    def execute(self, granule):
        """
        Example process to double the salinity value
        """
        # Use the PointSupplementStreamParser to pull data from a granule
        #psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=packet)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        salinity = get_safe(rdt, 'salinity')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')
#        #  pull data from a granule
#        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)
#
#        longitude = psd.get_values('longitude')
#        latitude = psd.get_values('latitude')
#        height = psd.get_values('height')
#        time = psd.get_values('time')

#        salinity = psd.get_values('salinity')

        salinity *= 2.0

        print ('Doubled salinity: %s' % str(salinity))


        # Use the constructor to put data into a granule
#        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
#
#        for i in xrange(len(salinity)):
#            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
#            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i])
#
#        return psc.close_stream_granule()
        root_rdt = RecordDictionaryTool(taxonomy=self.tx)

        #data_rdt = RecordDictionaryTool(taxonomy=self.tx)
        #coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['salinity'] = salinity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

        #root_rdt['coordinates'] = coord_rdt
        #root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L2_salinity', taxonomy=self.tx, record_dictionary=root_rdt)
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        temperature = get_safe(rdt, 'temp')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')

        log.warn('Got temperature: %s' % str(temperature))


        # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius.
        # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is
        # required to produce the correct decimal representation of the data in Celsius.
        # The scaling function differs by CTD make/model as described below.
        #    SBE 37IM, Output Format 0
        #    1) Standard conversion from 5-character hex string (Thex) to decimal (tdec)
        #    2) Scaling: T [C] = (tdec / 10,000) - 10

        root_rdt = RecordDictionaryTool(param_dictionary=self.temp)

        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        scaled_temperature = temperature

        for i in xrange(len(temperature)):
            scaled_temperature[i] = ( temperature[i] / 10000.0) - 10

        root_rdt['temp'] = scaled_temperature
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

        #todo: use only flat dicts for now, may change later...
#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L1_temperature', param_dictionary=self.temp, record_dictionary=root_rdt)
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        temperature = get_safe(rdt, 'sal')
        conductivity = get_safe(rdt, 'cond')
        salsure = get_safe(rdt, 'temp')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')

        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got salsure: %s' % str(salsure))
        log.warn('Got temperature: %s' % str(temperature))

        salinity = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=salsure)

        log.warn('Got salinity: %s' % str(salinity))


        root_rdt = RecordDictionaryTool(param_dictionary=self.sal)
        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['salinity'] = salinity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L2_salinity', param_dictionary=self.sal, record_dictionary=root_rdt)
    def _trigger_func(self, stream_id):

        sine_ampl = 2.0 # Amplitude in both directions
        samples = 60
        sine_curr_deg = 0 # varies from 0 - 360

        startTime = time.time()
        count = samples #something other than zero

        while True:
            count = time.time() - startTime
            sine_curr_deg = (count % samples) * 360 / samples

            c = numpy.array( [sine_ampl * math.sin(math.radians(sine_curr_deg))] )
            t = numpy.array( [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))] )
            p = numpy.array( [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))] )

            lat = lon = numpy.array([0.0])
            tvar = numpy.array([time.time()])

#            ctd_packet = ctd_stream_packet(stream_id=stream_id,
#                c=c, t=t, p = p, lat = lat, lon = lon, time=tvar)
            rdt = RecordDictionaryTool(taxonomy=tx)

            h = numpy.array([random.uniform(0.0, 360.0)])

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon
            rdt['height'] = h
            rdt['temp'] = t
            rdt['cond'] = c
            rdt['pres'] = p

            g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt)

            log.info('SinusoidalCtdPublisher sending 1 record!')
            self.publisher.publish(g)

            time.sleep(1.0)
Beispiel #12
0
    def _build_granule_settings(self, param_dictionary=None, field_name='', value=None, time=None, latitude=None, longitude=None, height=None):

        root_rdt = RecordDictionaryTool(param_dictionary=param_dictionary)

        #data_rdt = RecordDictionaryTool(taxonomy=taxonomy)

        root_rdt[field_name] = value

        #coor_rdt = RecordDictionaryTool(taxonomy=taxonomy)

        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

        #todo: use only flat dicts for now, may change later...
#        root_rdt['coordinates'] = coor_rdt
#        root_rdt['data'] = data_rdt

        log.debug("ctd_L0_all:_build_granule_settings: logging published Record Dictionary:\n %s", str(root_rdt.pretty_print()))

        return build_granule(data_producer_id='ctd_L0', param_dictionary=param_dictionary, record_dictionary=root_rdt)
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        array_len = get_safe(config, 'constraints.array_len',1)

        max_rec = get_safe(config, 'max_records', 1)
        dprod_id = get_safe(config, 'data_producer_id')
        #tx_yml = get_safe(config, 'taxonomy')
        #ttool = TaxyTool.load(tx_yml)
        pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

        arr = npr.random_sample(array_len)
        log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr))
        cnt = calculate_iteration_count(arr.size, max_rec)
        for x in xrange(cnt):
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            d = arr[x*max_rec:(x+1)*max_rec]
            rdt['dummy'] = d
            g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
            yield g
    def _get_data(cls, config):
        """
        A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers
        @param config Dict of configuration parameters - must contain ['constraints']['count']
        """
        cnt = get_safe(config,'constraints.count',1)

        max_rec = get_safe(config, 'max_records', 1)
        dprod_id = get_safe(config, 'data_producer_id')
        #tx_yml = get_safe(config, 'taxonomy')
        #ttool = TaxyTool.load(tx_yml)
        pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

        def fibGenerator():
            """
            A Fibonacci sequence generator
            """
            count = 0
            ret = []
            a, b = 1, 1
            while 1:
                count += 1
                ret.append(a)
                if count == max_rec:
                    yield np.array(ret)
                    ret=[]
                    count = 0

                a, b = b, a + b

        gen=fibGenerator()
        cnt = calculate_iteration_count(cnt, max_rec)
        for i in xrange(cnt):
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            d = gen.next()
            rdt['data'] = d
            g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
            yield g
    def execute(self, granule):
        """Processes incoming data!!!!
        """
        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        conductivity = get_safe(rdt, 'cond') #psd.get_values('conductivity')

        longitude = get_safe(rdt, 'lon') # psd.get_values('longitude')
        latitude = get_safe(rdt, 'lat')  #psd.get_values('latitude')
        time = get_safe(rdt, 'time') # psd.get_values('time')
        height = get_safe(rdt, 'height') # psd.get_values('time')

        log.warn('CTDL1ConductivityTransform: Got conductivity: %s' % str(conductivity))

        root_rdt = RecordDictionaryTool(param_dictionary=self.cond)

        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        scaled_conductivity = conductivity

        for i in xrange(len(conductivity)):
            scaled_conductivity[i] = (conductivity[i] / 100000.0) - 0.5

        root_rdt['cond'] = scaled_conductivity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L1_conductivity', param_dictionary=self.cond, record_dictionary=root_rdt)
    def test_build_granule_and_load_from_granule(self):
        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.reference_frame = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1970'
        pdict.add_context(t_ctxt)

        lat_ctxt = ParameterContext('lat', param_type=QuantityType(value_encoding=np.dtype('float32')))
        lat_ctxt.reference_frame = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        pdict.add_context(lat_ctxt)

        lon_ctxt = ParameterContext('lon', param_type=QuantityType(value_encoding=np.dtype('float32')))
        lon_ctxt.reference_frame = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        pdict.add_context(lon_ctxt)

        temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=np.dtype('float32')))
        temp_ctxt.uom = 'degree_Celsius'
        pdict.add_context(temp_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=np.dtype('float32')))
        cond_ctxt.uom = 'unknown'
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pres', param_type=QuantityType(value_encoding=np.dtype('float32')))
        pres_ctxt.uom = 'unknown'
        pdict.add_context(pres_ctxt)

        rdt = RecordDictionaryTool(param_dictionary=pdict)

        #Create some arrays and fill them with random values
        temp_array = np.random.standard_normal(100)
        cond_array = np.random.standard_normal(100)
        pres_array = np.random.standard_normal(100)
        time_array = np.random.standard_normal(100)
        lat_array = np.random.standard_normal(100)
        lon_array = np.random.standard_normal(100)

        #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc.
        rdt['temp'] = temp_array
        rdt['conductivity'] = cond_array
        rdt['pres'] = pres_array
        rdt['time'] = time_array
        rdt['lat'] = lat_array
        rdt['lon'] = lon_array

        g = build_granule(data_producer_id='john', record_dictionary=rdt, param_dictionary=pdict)

        l_pd = ParameterDictionary.load(g.param_dictionary)

        #l_tx = TaxyTool.load_from_granule(g)

        l_rd = RecordDictionaryTool.load_from_granule(g)

        # Make sure we got back the same Taxonomy Object
        #self.assertEquals(l_pd, pdict)
        self.assertEquals(l_pd.ord_from_key('temp'), pdict.ord_from_key('temp'))
        self.assertEquals(l_pd.ord_from_key('conductivity'), pdict.ord_from_key('conductivity'))


        # Now test the record dictionary object
        self.assertEquals(l_rd._rd, rdt._rd)
        #self.assertEquals(l_rd._param_dict, rdt._param_dict)


        for k, v in l_rd.iteritems():
            self.assertIn(k, rdt)

            if isinstance(v, np.ndarray):
                self.assertTrue( (v == rdt[k]).all())

            else:
                self.assertEquals(v._rd, rdt[k]._rd)
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds=get_safe(config, 'dataset_object')
        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0,1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice,str):
                t_slice=eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
            #tx_yml = get_safe(config, 'taxonomy')
            #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
            pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x*max_rec:(x+1)*max_rec]

                # Make a 'master' RecDict
                #rdt = RecordDictionaryTool(taxonomy=ttool)
                rdt = RecordDictionaryTool(param_dictionary=pdict)
                # Make a 'coordinate' RecDict
                #rdt_c = RecordDictionaryTool(taxonomy=ttool)
                #rdt_c = RecordDictionaryTool(param_dictionary=pdict)
                # Make a 'data' RecDict
                #rdt_d = RecordDictionaryTool(taxonomy=ttool)
                #rdt_d = RecordDictionaryTool(param_dictionary=pdict)

                # Assign values to the coordinate RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign values to the data RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x*max_rec:(x+1)*max_rec]
                    rdt[key] = d

                # Add the coordinate and data RecDicts to the master RecDict
                #rdt['coords'] = rdt_c
                #rdt['data'] = rdt_d

                # Build and return a granule
                # CBM: ttool must be passed
                #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
                yield g

            ds.close()
Beispiel #18
0
    def build_packet(self, *args, **kwargs):
        """
        Build and return a granule of data.
        @param taxonomy the taxonomy of the granule
        @data dictionary containing sample data.
        @return granule suitable for publishing
        """
        taxonomy_str = kwargs.get('taxonomy')
        data = kwargs.get('data')
        data_producer_id = kwargs.get('data_producer_id')

        if not data_producer_id:
            raise PacketFactoryException("data_producer_id parameter missing")

        if not taxonomy_str:
            raise PacketFactoryException("taxonomy parameter missing")

        if not data:
            raise PacketFactoryException("data parameter missing")

        taxonomy = self._get_taxy_tool(taxonomy_str)

        # the nick_names in the taxonomy:
        nick_names = self._get_nick_names_from_taxonomy(taxonomy)

        #
        # TODO in general, how are groups (and the individual values
        # belonging to the groups) to be determined?
        #

        # in this version, expect 'data' and 'coordinates' to be included in
        # the taxonomy -- TODO the idea would be to be more general here?

        ##############################################################
        # NOTE for the moment, using the flat data record dict 'rdt'
        ##############################################################

#        if not 'data' in nick_names:
#            raise PacketFactoryException("expected name 'data' in taxonomy")
#        if not 'coordinates' in nick_names:
#            raise PacketFactoryException("expected name 'coordinates' in taxonomy")


        rdt = RecordDictionaryTool(taxonomy=taxonomy)
#        data_rdt = RecordDictionaryTool(taxonomy=taxonomy)
#        coordinates_rdt = RecordDictionaryTool(taxonomy=taxonomy)
#
#        rdt['data'] = data_rdt
#        rdt['coordinates'] = coordinates_rdt

#        def is_coordinate(nick_name):
#            # just an ad hoc check to determine which group the nick_names
#            # belong to
#            return nick_name in ['lat', 'lon', 'time', 'height']


        # now, assign the values to the corresp record dicts:
        for name, value in data.iteritems():
            handle = -1
            log.info("packetfactory: name: %s" % str(name))
            if name in nick_names:
                handle = taxonomy.get_handle(name)
                log.info("packetfactory: handle: %s" % str(handle))
            else:
                handles = taxonomy.get_handles(name)
                log.info("packetfactory: handles: %s" % str(handles))
                if len(handles) == 1:
                    handle = handles.pop()
                elif len(handles) > 1:
                    # TODO proper handling of this case
                    log.warn("Multiple handles found for '%s': %s" % (name %
                                                                 handles))

            if handle >= 0:
                # ok, the nick_name has been found, either directly as a
                # nick_name or via an alias; set value (using nick_name):
                nick_name = taxonomy.get_nick_name(handle)

                assert isinstance(value, list)
                val = numpy.array(value)

                # NOTE for the moment, using the flat data record dict 'rdt':
                rdt[nick_name] = val
#                if is_coordinate(nick_name):
#                    coordinates_rdt[nick_name] = val
#                else:
#                    data_rdt[nick_name] = val

            else:
                # name not found.
                # In the current tests this is happening with 'stream_id'
                log.warning("No handle found for '%s'" % name)

        log.debug("dictionary created: %s" % rdt.pretty_print())

        return build_granule(data_producer_id=data_producer_id, taxonomy=taxonomy, record_dictionary=rdt)
    def render_graphs(self, graph_data):

        # init Matplotlib
        fig = Figure(figsize=(8, 4), dpi=200, frameon=True)
        ax = fig.add_subplot(111)
        canvas = FigureCanvas(fig)
        imgInMem = StringIO.StringIO()

        # If there's no data, wait
        # For the simple case of testing, lets plot all time variant variables one at a time
        xAxisVar = "time"
        xAxisFloatData = graph_data[xAxisVar]
        rdt = RecordDictionaryTool(taxonomy=tx)

        # Prepare the set of y axis variables that will be plotted. This needs to be smarter and passed as
        # config variable to the transform
        yAxisVars = []
        for varName, varData in graph_data.iteritems():
            if varName == "time" or varName == "height" or varName == "longitude" or varName == "latitude":
                continue
            yAxisVars.append(varName)

        idx = 0
        for varName in yAxisVars:
            yAxisFloatData = graph_data[varName]

            # Generate the plot
            ax.plot(xAxisFloatData, yAxisFloatData, self.line_style(idx), label=varName)
            idx += 1

        yAxisLabel = ""
        # generate a filename for the output image
        for varName in yAxisVars:
            if yAxisLabel:
                yAxisLabel = yAxisLabel + "-" + varName
            else:
                yAxisLabel = varName

        fileName = yAxisLabel + "_vs_" + xAxisVar + ".png"

        ax.set_xlabel(xAxisVar)
        ax.set_ylabel(yAxisLabel)
        ax.set_title(yAxisLabel + " vs " + xAxisVar)
        ax.set_autoscale_on(False)
        ax.legend(loc="upper left")

        # Save the figure to the in memory file
        canvas.print_figure(imgInMem, format="png")
        imgInMem.seek(0)

        # submit resulting table back using the out stream publisher
        msg = {
            "viz_product_type": "matplotlib_graphs",
            "image_obj": imgInMem.getvalue(),
            "image_name": fileName,
            "content_type": "image/png",
        }

        rdt["matplotlib_graphs"] = numpy.array([msg])
        # Generate a list of the graph objects generated
        return build_granule(data_producer_id="matplotlib_graphs_transform", taxonomy=tx, record_dictionary=rdt)
Beispiel #20
0
    def build_packet(self, *args, **kwargs):
        """
        Build and return a granule of data.
        @param taxonomy the taxonomy of the granule
        @data dictionary containing sample data.
        @return granule suitable for publishing
        """
        taxonomy_str = kwargs.get('taxonomy')
        data = kwargs.get('data')
        data_producer_id = kwargs.get('data_producer_id')

        if not data_producer_id:
            raise PacketFactoryException("data_producer_id parameter missing")

        if not taxonomy_str:
            raise PacketFactoryException("taxonomy parameter missing")

        if not data:
            raise PacketFactoryException("data parameter missing")

        taxonomy = self._get_taxy_tool(taxonomy_str)

        # the nick_names in the taxonomy:
        nick_names = self._get_nick_names_from_taxonomy(taxonomy)

        #
        # TODO in general, how are groups (and the individual values
        # belonging to the groups) to be determined?
        #

        # in this version, expect 'data' and 'coordinates' to be included in
        # the taxonomy -- TODO the idea would be to be more general here?

        ##############################################################
        # NOTE for the moment, using the flat data record dict 'rdt'
        ##############################################################

        #        if not 'data' in nick_names:
        #            raise PacketFactoryException("expected name 'data' in taxonomy")
        #        if not 'coordinates' in nick_names:
        #            raise PacketFactoryException("expected name 'coordinates' in taxonomy")

        rdt = RecordDictionaryTool(taxonomy=taxonomy)
        #        data_rdt = RecordDictionaryTool(taxonomy=taxonomy)
        #        coordinates_rdt = RecordDictionaryTool(taxonomy=taxonomy)
        #
        #        rdt['data'] = data_rdt
        #        rdt['coordinates'] = coordinates_rdt

        #        def is_coordinate(nick_name):
        #            # just an ad hoc check to determine which group the nick_names
        #            # belong to
        #            return nick_name in ['lat', 'lon', 'time', 'height']

        # now, assign the values to the corresp record dicts:
        for name, value in data.iteritems():
            handle = -1
            log.info("packetfactory: name: %s" % str(name))
            if name in nick_names:
                handle = taxonomy.get_handle(name)
                log.info("packetfactory: handle: %s" % str(handle))
            else:
                handles = taxonomy.get_handles(name)
                log.info("packetfactory: handles: %s" % str(handles))
                if len(handles) == 1:
                    handle = handles.pop()
                elif len(handles) > 1:
                    # TODO proper handling of this case
                    log.warn("Multiple handles found for '%s': %s" %
                             (name % handles))

            if handle >= 0:
                # ok, the nick_name has been found, either directly as a
                # nick_name or via an alias; set value (using nick_name):
                nick_name = taxonomy.get_nick_name(handle)

                assert isinstance(value, list)
                val = numpy.array(value)

                # NOTE for the moment, using the flat data record dict 'rdt':
                rdt[nick_name] = val


#                if is_coordinate(nick_name):
#                    coordinates_rdt[nick_name] = val
#                else:
#                    data_rdt[nick_name] = val

            else:
                # name not found.
                # In the current tests this is happening with 'stream_id'
                log.warning("No handle found for '%s'" % name)

        log.debug("dictionary created: %s" % rdt.pretty_print())

        return build_granule(data_producer_id=data_producer_id,
                             taxonomy=taxonomy,
                             record_dictionary=rdt)
Beispiel #21
0
    def execute(self, granule):

        log.debug('(Google DT transform): Received Viz Data Packet' )

        #init stuff
        varTuple = []
        dataDescription = []
        dataTableContent = []

        rdt = RecordDictionaryTool.load_from_granule(granule)

        vardict = {}
        vardict['time'] = get_safe(rdt, 'time')
        vardict['conductivity'] = get_safe(rdt, 'cond')
        vardict['pressure'] = get_safe(rdt, 'pres')
        vardict['temperature'] = get_safe(rdt, 'temp')

        vardict['longitude'] = get_safe(rdt, 'long')
        vardict['latitude'] = get_safe(rdt, 'lat')
        vardict['height'] = get_safe(rdt, 'height')
        arrLen = len(vardict['time'])  # Figure out how many values are present in the granule

        #iinit the dataTable
        # create data description from the variables in the message
        dataDescription = [('time', 'float', 'time')]

        # split the data string to extract variable names
        for varname in  vardict.keys():   #psd.list_field_names():
            if varname == 'time':
                continue

            dataDescription.append((varname, 'number', varname))

        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname,_,_ in dataDescription:

                if vardict[varname] == None or len(vardict[varname]) == 0:
                    val = 0.0
                else:
                    val = float(vardict[varname][i])

                varTuple.append(val)

            # Append the tuples to the data table
            if len(varTuple) > 0:
                dataTableContent.append(varTuple)

        # submit the partial datatable to the viz service
        out_rdt = RecordDictionaryTool(taxonomy=tx)

        # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id
        # responsible for the incoming data
        msg = {"viz_product_type": "google_dt",
               "data_description": dataDescription,
               "data_content": dataTableContent}

        out_rdt['google_dt_components'] = numpy.array([msg])

        log.debug('Google DT transform: Sending a granule')
        out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=out_rdt)

        return out_granule