Example #1
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))
Example #2
0
 def get_last_values(cls, dataset_id, number_of_points):
     coverage = DatasetManagementService._get_coverage(dataset_id,mode='r')
     if coverage.num_timesteps < number_of_points:
         if coverage.num_timesteps == 0:
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
             return rdt.to_granule()
         number_of_points = coverage.num_timesteps
     rdt = cls._coverage_to_granule(coverage,tdoa=slice(-number_of_points,None))
     coverage.close(timeout=5)
     
     return rdt.to_granule()
Example #3
0
    def get_last_values(cls, dataset_id, number_of_points):
        coverage = DatasetManagementService._get_coverage(dataset_id, mode='r')
        if coverage.num_timesteps < number_of_points:
            if coverage.num_timesteps == 0:
                rdt = RecordDictionaryTool(
                    param_dictionary=coverage.parameter_dictionary)
                return rdt.to_granule()
            number_of_points = coverage.num_timesteps
        rdt = cls._coverage_to_granule(coverage,
                                       tdoa=slice(-number_of_points, None))
        coverage.close(timeout=5)

        return rdt.to_granule()
Example #4
0
    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))
Example #5
0
 def execute_retrieve(self):
     """
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     """
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id, mode="r")
         if coverage.num_timesteps == 0:
             log.info("Reading from an empty coverage")
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(
                 coverage=coverage,
                 start_time=self.start_time,
                 end_time=self.end_time,
                 stride_time=self.stride_time,
                 parameters=self.parameters,
                 tdoa=self.tdoa,
             )
     except:
         log.exception("Problems reading from the coverage")
         raise BadRequest("Problems reading from the coverage")
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
 def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''):
     query = query or {}
     coverage = None
     try:
         coverage = cls._get_coverage(dataset_id)
         if coverage is None:
             raise BadRequest('no such coverage')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(
                 param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = ReplayProcess._coverage_to_granule(
                 coverage=coverage,
                 start_time=query.get('start_time', None),
                 end_time=query.get('end_time', None),
                 stride_time=query.get('stride_time', None),
                 parameters=query.get('parameters', None),
                 stream_def_id=delivery_format,
                 tdoa=query.get('tdoa', None))
     except:
         cls._eject_cache(dataset_id)
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
 def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''):
     query = query or {}
     coverage = None
     try:
         coverage = cls._get_coverage(dataset_id)
         if coverage is None:
             raise BadRequest('no such coverage')
         if isinstance(coverage, SimplexCoverage) and coverage.is_empty():
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             args = {
                 'start_time'     : query.get('start_time', None),
                 'end_time'       : query.get('end_time', None),
                 'stride_time'    : query.get('stride_time', None),
                 'parameters'     : query.get('parameters', None),
                 'stream_def_id'  : delivery_format,
                 'tdoa'           : query.get('tdoa', None),
                 'sort_parameter' : query.get('sort_parameter', None)
             }
             rdt = ReplayProcess._cov2granule(coverage=coverage, **args)
     except Exception as e:
         cls._eject_cache(dataset_id)
         data_products, _ = Container.instance.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct)
         for data_product in data_products:
             log.error("Data Product %s (%s) had issues reading from the coverage model\nretrieve_oob(dataset_id='%s', query=%s, delivery_format=%s)", data_product.name, data_product._id, dataset_id, query, delivery_format)
         log.error("Problems reading from the coverage", exc_info=True)
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
    def publish_to_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        granule = rdt.to_granule()
        publisher.publish(granule)
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Example #11
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id,
                                                           mode='r')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(
                 param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(coverage=coverage,
                                             start_time=self.start_time,
                                             end_time=self.end_time,
                                             stride_time=self.stride_time,
                                             parameters=self.parameters,
                                             tdoa=self.tdoa)
     except:
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
    def test_execute_transform(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)

        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        
        msg = rdt_in.to_granule()
        #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}})
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out))
        #need below to wrap result in a param val object
        rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule())
        for k,v in rdt_out.iteritems():
            self.assertEqual(len(v), dt)        
        
        self._validate_transforms(rdt_in, rdt_out)
        self.container.proc_manager.terminate_process(pid)
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))
Example #14
0
    def get_granule(self, time=None, pd=None):
        lat,lon,_ = self.get_location(time)
        value = self.get_value(time)

        pkg = RecordDictionaryTool(pd)
        pkg['salinity'] = array([value]*self.message_size)
        pkg['lat'] = array([lat]*self.message_size)
        pkg['lon'] = array([lon]*self.message_size)
        granule = pkg.to_granule()
        return granule
Example #15
0
    def test_array_visualization(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        granule = rdt.to_granule()
        dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id))
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def)

        rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        testval = {'data_content': [
            [0.0 , 0.0  , 1.0  , 2.0  , 3.0  , 0.0  , 2.0  , 4.0  , 6.0  , 0.0  , 1.0  , 2.0  , 3.0]   ,
            [1.0 , 4.0  , 5.0  , 6.0  , 7.0  , 8.0  , 10.0 , 12.0 , 14.0 , 4.0  , 5.0  , 6.0  , 7.0]   ,
            [2.0 , 8.0  , 9.0  , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0  , 9.0  , 10.0 , 11.0]  ,
            [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0]  ,
            [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0]  ,
            [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0]  ,
            [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0]  ,
            [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0]  ,
            [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0]  ,
            [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] ,
                 'data_description': [('time', 'number', 'time'),
              ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}),
              ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}),
              ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}),
              ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}),
              ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}),
              ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}),
              ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}),
              ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}),
              ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}),
              ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}),
              ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}),
              ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})],
             'viz_product_type': 'google_dt'}
        self.assertEquals(rdt['google_dt_components'][0], testval)
Example #16
0
    def write_to_data_product(self, data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                             'hasDataset',
                                                             id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                            'hasStream',
                                                            id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(
            stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(
                data_product_id).name
            return
        rdt[time_param] = np.arange(40)

        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(
                rdt._pdict.get_context(field).param_type, 40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
    def test_array_visualization(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        granule = rdt.to_granule()
        dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id))
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def)

        rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        testval = {'data_content': [
            [0.0 , 0.0  , 1.0  , 2.0  , 3.0  , 0.0  , 2.0  , 4.0  , 6.0  , 0.0  , 1.0  , 2.0  , 3.0]   ,
            [1.0 , 4.0  , 5.0  , 6.0  , 7.0  , 8.0  , 10.0 , 12.0 , 14.0 , 4.0  , 5.0  , 6.0  , 7.0]   ,
            [2.0 , 8.0  , 9.0  , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0  , 9.0  , 10.0 , 11.0]  ,
            [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0]  ,
            [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0]  ,
            [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0]  ,
            [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0]  ,
            [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0]  ,
            [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0]  ,
            [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] ,
                 'data_description': [('time', 'number', 'time'),
              ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}),
              ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}),
              ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}),
              ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}),
              ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}),
              ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}),
              ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}),
              ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}),
              ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}),
              ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}),
              ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}),
              ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})],
             'viz_product_type': 'google_dt'}
        self.assertEquals(rdt['google_dt_components'][0], testval)
    def test_transform_prime_no_available_fields(self):
        available_fields_in = []
        available_fields_out = []
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            for k,v in rdt_out.iteritems():
                self.assertEquals(rdt_out[k], None)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))
 def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''):
     query = query or {}
     coverage = None
     try:
         coverage = cls._get_coverage(dataset_id)
         if coverage is None:
             raise BadRequest('no such coverage')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = ReplayProcess._coverage_to_granule(coverage=coverage, start_time=query.get('start_time', None), end_time=query.get('end_time',None), stride_time=query.get('stride_time',None), parameters=query.get('parameters',None), stream_def_id=delivery_format, tdoa=query.get('tdoa',None))
     except:
         cls._eject_cache(dataset_id)
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
Example #20
0
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
Example #21
0
    def rdt_to_granule(self, context, value_array, comp_val=None):

        pdict = ParameterDictionary()
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt["test"] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2["test"]

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #22
0
    def rdt_to_granule(self, context, value_array, comp_val=None):

        pdict = ParameterDictionary()
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #23
0
    def write_to_data_product(self,data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name 
            return
        rdt[time_param] = np.arange(40)


        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id,40)


        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
Example #24
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try: 
         coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else: 
             rdt = self._coverage_to_granule(coverage=coverage,start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters,tdoa=self.tdoa)
     except Exception as e:
         import traceback
         traceback.print_exc(e)
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
 def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''):
     query = query or {}
     coverage = None
     try:
         coverage = cls._get_coverage(dataset_id)
         if coverage is None:
             raise BadRequest('no such coverage')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = ReplayProcess._cov2granule(coverage=coverage, start_time=query.get('start_time', None), end_time=query.get('end_time',None), stride_time=query.get('stride_time',None), parameters=query.get('parameters',None), stream_def_id=delivery_format, tdoa=query.get('tdoa',None))
     except:
         cls._eject_cache(dataset_id)
         data_products, _ = Container.instance.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct)
         for data_product in data_products:
             log.exception("Data Product %s (%s) had issues reading from the coverage model\nretrieve_oob(dataset_id='%s', query=%s, delivery_format=%s)", data_product.name, data_product._id, dataset_id, query, delivery_format)
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
Example #26
0
    def rdt_to_granule(self, context, value_array, comp_val=None):
        time = ParameterContext(name='time', param_type=QuantityType(value_encoding=np.float64))
        
        pdict = ParameterDictionary()
        pdict.add_context(time, is_temporal=True)
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['time'] = np.arange(len(value_array))
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #27
0
    def rdt_to_granule(self, context, value_array, comp_val=None):
        time = ParameterContext(
            name='time', param_type=QuantityType(value_encoding=np.float64))

        pdict = ParameterDictionary()
        pdict.add_context(time, is_temporal=True)
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['time'] = np.arange(len(value_array))
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]


        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Example #30
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try: 
         coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
         if coverage.is_empty():
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else: 
             rdt = ReplayProcess._cov2granule(coverage=coverage, 
                     start_time=self.start_time, 
                     end_time=self.end_time,
                     stride_time=self.stride_time, 
                     parameters=self.parameters, 
                     stream_def_id=self.delivery_format, 
                     tdoa=self.tdoa)
     except:
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
Example #31
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
 def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''):
     query = query or {}
     coverage = None
     try:
         coverage = cls._get_coverage(dataset_id)
         if coverage is None:
             raise BadRequest('no such coverage')
         if isinstance(coverage, SimplexCoverage) and coverage.is_empty():
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(
                 param_dictionary=coverage.parameter_dictionary)
         else:
             args = {
                 'start_time': query.get('start_time', None),
                 'end_time': query.get('end_time', None),
                 'stride_time': query.get('stride_time', None),
                 'parameters': query.get('parameters', None),
                 'stream_def_id': delivery_format,
                 'tdoa': query.get('tdoa', None),
                 'sort_parameter': query.get('sort_parameter', None)
             }
             rdt = ReplayProcess._cov2granule(coverage=coverage, **args)
     except Exception as e:
         cls._eject_cache(dataset_id)
         data_products, _ = Container.instance.resource_registry.find_subjects(
             object=dataset_id,
             predicate=PRED.hasDataset,
             subject_type=RT.DataProduct)
         for data_product in data_products:
             log.error(
                 "Data Product %s (%s) had issues reading from the coverage model\nretrieve_oob(dataset_id='%s', query=%s, delivery_format=%s)",
                 data_product.name, data_product._id, dataset_id, query,
                 delivery_format)
         log.error("Problems reading from the coverage", exc_info=True)
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
Example #33
0
class CoverageCraft(object):
    '''
    AKA the BlackBox
    PFM courtesy of Tommy Vandesteene
    '''
    def __init__(self,coverage=None, granule=None):
        if coverage is None:
            self.coverage = self.create_coverage()
            self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary)
        else:
            self.coverage = coverage
            if granule is not None:
                self.sync_with_granule(granule)
            else:
                self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary)
        self.pdict = self.coverage.parameter_dictionary


    def sync_rdt_with_granule(self, granule):
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.rdt = rdt
        return rdt

    def sync_with_granule(self, granule=None):
        if granule is not None:
            self.sync_rdt_with_granule(granule)
        if self.rdt is None:
            log.error('Failed to add granule, no granule assigned.')
            return
        start_index = self.coverage.num_timesteps 
        elements = self.rdt._shp[0]
        if not elements: return
        self.coverage.insert_timesteps(elements)

        for k,v in self.rdt.iteritems():
            log.info("key: %s" , k)
            log.info("value: %s" , v)
            slice_ = slice(start_index,None)
            log.info("slice: %s",  slice_)
            self.coverage.set_parameter_values(param_name=k,tdoa=slice_, value=v)


    def sync_rdt_with_coverage(self, coverage=None, tdoa=None, start_time=None, end_time=None, stride_time=None, parameters=None):
        '''
        Builds a granule based on the coverage
        '''
        if coverage is None:
            coverage = self.coverage

        slice_ = slice(None) # Defaults to all values
        if tdoa is not None and isinstance(tdoa,slice):
            slice_ = tdoa

        elif stride_time is not None:
            validate_is_instance(start_time, Number, 'start_time must be a number for striding.')
            validate_is_instance(end_time, Number, 'end_time must be a number for striding.')
            validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.')
            ugly_range = np.arange(start_time, end_time, stride_time)
            idx_values = [self.get_relative_time(coverage,i) for i in ugly_range]
            slice_ = [idx_values]

        elif not (start_time is None and end_time is None):
            time_var = coverage._temporal_param_name
            uom = coverage.get_parameter_context(time_var).uom
            if start_time is not None:
                start_units = self.ts_to_units(uom,start_time)
                log.info('Units: %s', start_units)
                start_idx = self.get_relative_time(coverage,start_units)
                log.info('Start Index: %s', start_idx)
                start_time = start_idx
            if end_time is not None:
                end_units   = self.ts_to_units(uom,end_time)
                log.info('End units: %s', end_units)
                end_idx   = self.get_relative_time(coverage,end_units)
                log.info('End index: %s',  end_idx)
                end_time = end_idx
            slice_ = slice(start_time,end_time,stride_time)
            log.info('Slice: %s', slice_)

        if parameters is not None:
            pdict = ParameterDictionary()
            params = set(coverage.list_parameters()).intersection(parameters)
            for param in params:
                pdict.add_context(coverage.get_parameter_context(param))
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            self.pdict = pdict
        else:
            rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
        
        fields = coverage.list_parameters()
        if parameters is not None:
            fields = set(fields).intersection(parameters)

        for d in fields:
            rdt[d] = coverage.get_parameter_values(d,tdoa=slice_)
        self.rdt = rdt # Sync

    def to_granule(self):
        return self.rdt.to_granule()

    @classmethod
    def create_coverage(cls):
        pdict = cls.create_parameters()
        sdom, tdom = cls.create_domains()
    
        scov = SimplexCoverage('sample grid coverage_model', pdict, tdom, sdom)

        return scov

    @classmethod
    def create_domains(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        tcrs = CRS([AxisTypeEnum.TIME])
        scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT, AxisTypeEnum.HEIGHT])

        tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE)
        sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # Dimensionality is excluded for now
        return sdom, tdom

    @classmethod
    def create_parameters(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        pdict = ParameterDictionary()
        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        pdict.add_context(t_ctxt)

        lat_ctxt = ParameterContext('lat', param_type=QuantityType(value_encoding=np.float32))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt.fill_value = 0e0
        pdict.add_context(lat_ctxt)

        lon_ctxt = ParameterContext('lon', param_type=QuantityType(value_encoding=np.float32))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt.fill_value = 0e0
        pdict.add_context(lon_ctxt)

        temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        pdict.add_context(temp_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        pdict.add_context(cond_ctxt)

        data_ctxt = ParameterContext('data', param_type=QuantityType(value_encoding=np.int8))
        data_ctxt.uom = 'byte'
        data_ctxt.fill_value = 0x0
        pdict.add_context(data_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        pdict.add_context(pres_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        pdict.add_context(sal_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=np.float32))
        dens_ctxt.uom = 'unknown'
        dens_ctxt.fill_value = 0x0
        pdict.add_context(dens_ctxt)

        return pdict
        
    @classmethod
    def get_relative_time(cls, coverage, time):
        '''
        Determines the relative time in the coverage model based on a given time
        The time must match the coverage's time units
        '''
        time_name = coverage._temporal_param_name
        pc = coverage.get_parameter_context(time_name)
        units = pc.uom
        if 'iso' in units:
            return None # Not sure how to implement this....  How do you compare iso strings effectively?
        values = coverage.get_parameter_values(time_name)
        return cls.find_nearest(values,time)
       
    @classmethod
    def find_nearest(cls, arr, val):
        '''
        The sexiest algorithm for finding the best matching value for a numpy array
        '''
        idx = np.abs(arr-val).argmin()
        return idx



    @staticmethod
    def ts_to_units(units, val):
        '''
        Converts a unix timestamp into various formats
        Example:
        ts = time.time()
        CoverageCraft.ts_to_units('days since 2000-01-01', ts)
        '''
        if 'iso' in units:
            return time.strftime('%Y-%d-%mT%H:%M:%S', time.gmtime(val))
        elif 'since' in units:
            t = netCDF4.netcdftime.utime(units)
            return t.date2num(datetime.datetime.utcfromtimestamp(val))
        else:
            return val


    @staticmethod
    def units_to_ts(units, val):
        '''
        Converts known time formats into a unix timestamp
        Example:
        ts = CoverageCraft.units_to_ts('days since 2000-01-01', 1200)
        '''
        if 'since' in units:
            t = netCDF4.netcdftime.utime(units)
            dtg = t.num2date(val)
            return time.mktime(dtg.timetuple())
        elif 'iso' in units:
            t = dateutil.parser.parse(val)
            return time.mktime(t.timetuple())
        else:
            return val
        


    def build_coverage(self):
        pass
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))
Example #35
0
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))
Example #36
0
class CoverageCraft(object):
    '''
    AKA the BlackBox
    PFM courtesy of Tommy Vandesteene
    '''
    def __init__(self,coverage=None, granule=None):
        if coverage is None:
            self.coverage = self.create_coverage()
            self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary)
        else:
            self.coverage = coverage
            if granule is not None:
                self.sync_with_granule(granule)
            else:
                self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary)
        self.pdict = self.coverage.parameter_dictionary


    def sync_rdt_with_granule(self, granule):
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.rdt = rdt
        return rdt

    def sync_with_granule(self, granule=None):
        if granule is not None:
            self.sync_rdt_with_granule(granule)
        if self.rdt is None:
            log.error('Failed to add granule, no granule assigned.')
            return
        start_index = self.coverage.num_timesteps 
        elements = self.rdt._shp[0]
        if not elements: return
        self.coverage.insert_timesteps(elements)

        for k,v in self.rdt.iteritems():
            log.info("key: %s" , k)
            log.info("value: %s" , v)
            slice_ = slice(start_index,None)
            log.info("slice: %s",  slice_)
            self.coverage.set_parameter_values(param_name=k,tdoa=slice_, value=v)


    def sync_rdt_with_coverage(self, coverage=None, tdoa=None, start_time=None, end_time=None, stride_time=None, parameters=None):
        '''
        Builds a granule based on the coverage
        '''
        if coverage is None:
            coverage = self.coverage

        slice_ = slice(None) # Defaults to all values
        if tdoa is not None and isinstance(tdoa,slice):
            slice_ = tdoa

        elif stride_time is not None:
            validate_is_instance(start_time, Number, 'start_time must be a number for striding.')
            validate_is_instance(end_time, Number, 'end_time must be a number for striding.')
            validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.')
            ugly_range = np.arange(start_time, end_time, stride_time)
            idx_values = [TimeUtils.get_relative_time(coverage,i) for i in ugly_range]
            slice_ = [idx_values]

        elif not (start_time is None and end_time is None):
            time_var = coverage._temporal_param_name
            uom = coverage.get_parameter_context(time_var).uom
            if start_time is not None:
                start_units = TimeUtils.ts_to_units(uom,start_time)
                log.info('Units: %s', start_units)
                start_idx = TimeUtils.get_relative_time(coverage,start_units)
                log.info('Start Index: %s', start_idx)
                start_time = start_idx
            if end_time is not None:
                end_units   = TimeUtils.ts_to_units(uom,end_time)
                log.info('End units: %s', end_units)
                end_idx   = TimeUtils.get_relative_time(coverage,end_units)
                log.info('End index: %s',  end_idx)
                end_time = end_idx
            slice_ = slice(start_time,end_time,stride_time)
            log.info('Slice: %s', slice_)

        if parameters is not None:
            pdict = ParameterDictionary()
            params = set(coverage.list_parameters()).intersection(parameters)
            for param in params:
                pdict.add_context(coverage.get_parameter_context(param))
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            self.pdict = pdict
        else:
            rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
        
        fields = coverage.list_parameters()
        if parameters is not None:
            fields = set(fields).intersection(parameters)

        for d in fields:
            rdt[d] = coverage.get_parameter_values(d,tdoa=slice_)
        self.rdt = rdt # Sync

    def to_granule(self):
        return self.rdt.to_granule()

    @classmethod
    def create_coverage(cls):
        pdict = cls.create_parameters()
        sdom, tdom = cls.create_domains()
    
        scov = SimplexCoverage('sample grid coverage_model', pdict, tdom, sdom)

        return scov

    @classmethod
    def create_domains(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        tcrs = CRS([AxisTypeEnum.TIME])
        scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT, AxisTypeEnum.HEIGHT])

        tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE)
        sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # Dimensionality is excluded for now
        return sdom, tdom

    @classmethod
    def create_parameters(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        pdict = ParameterDictionary()
        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        pdict.add_context(t_ctxt)

        lat_ctxt = ParameterContext('lat', param_type=QuantityType(value_encoding=np.float32))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt.fill_value = 0e0
        pdict.add_context(lat_ctxt)

        lon_ctxt = ParameterContext('lon', param_type=QuantityType(value_encoding=np.float32))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt.fill_value = 0e0
        pdict.add_context(lon_ctxt)

        temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        pdict.add_context(temp_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        pdict.add_context(cond_ctxt)

        data_ctxt = ParameterContext('data', param_type=QuantityType(value_encoding=np.int8))
        data_ctxt.uom = 'byte'
        data_ctxt.fill_value = 0x0
        pdict.add_context(data_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        pdict.add_context(pres_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        pdict.add_context(sal_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=np.float32))
        dens_ctxt.uom = 'unknown'
        dens_ctxt.fill_value = 0x0
        pdict.add_context(dens_ctxt)

        return pdict
        
    def build_coverage(self):
        pass
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
Example #38
0
class CoverageCraft(object):
    '''
    AKA the BlackBox
    PFM courtesy of Tommy Vandesteene
    '''
    def __init__(self, coverage=None, granule=None):
        if coverage is None:
            self.coverage = self.create_coverage()
            self.rdt = RecordDictionaryTool(
                param_dictionary=self.coverage.parameter_dictionary)
        else:
            self.coverage = coverage
            if granule is not None:
                self.sync_with_granule(granule)
            else:
                self.rdt = RecordDictionaryTool(
                    param_dictionary=self.coverage.parameter_dictionary)
        self.pdict = self.coverage.parameter_dictionary

    def sync_rdt_with_granule(self, granule):
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.rdt = rdt
        return rdt

    def sync_with_granule(self, granule=None):
        if granule is not None:
            self.sync_rdt_with_granule(granule)
        if self.rdt is None:
            log.error('Failed to add granule, no granule assigned.')
            return
        start_index = self.coverage.num_timesteps
        elements = self.rdt._shp[0]
        if not elements: return
        self.coverage.insert_timesteps(elements)

        for k, v in self.rdt.iteritems():
            log.info("key: %s", k)
            log.info("value: %s", v)
            slice_ = slice(start_index, None)
            log.info("slice: %s", slice_)
            self.coverage.set_parameter_values(param_name=k,
                                               tdoa=slice_,
                                               value=v)

    def sync_rdt_with_coverage(self,
                               coverage=None,
                               tdoa=None,
                               start_time=None,
                               end_time=None,
                               stride_time=None,
                               parameters=None):
        '''
        Builds a granule based on the coverage
        '''
        if coverage is None:
            coverage = self.coverage

        slice_ = slice(None)  # Defaults to all values
        if tdoa is not None and isinstance(tdoa, slice):
            slice_ = tdoa

        elif stride_time is not None:
            validate_is_instance(start_time, Number,
                                 'start_time must be a number for striding.')
            validate_is_instance(end_time, Number,
                                 'end_time must be a number for striding.')
            validate_is_instance(stride_time, Number,
                                 'stride_time must be a number for striding.')
            ugly_range = np.arange(start_time, end_time, stride_time)
            idx_values = [
                TimeUtils.get_relative_time(coverage, i) for i in ugly_range
            ]
            slice_ = [idx_values]

        elif not (start_time is None and end_time is None):
            time_var = coverage._temporal_param_name
            uom = coverage.get_parameter_context(time_var).uom
            if start_time is not None:
                start_units = TimeUtils.ts_to_units(uom, start_time)
                log.info('Units: %s', start_units)
                start_idx = TimeUtils.get_relative_time(coverage, start_units)
                log.info('Start Index: %s', start_idx)
                start_time = start_idx
            if end_time is not None:
                end_units = TimeUtils.ts_to_units(uom, end_time)
                log.info('End units: %s', end_units)
                end_idx = TimeUtils.get_relative_time(coverage, end_units)
                log.info('End index: %s', end_idx)
                end_time = end_idx
            slice_ = slice(start_time, end_time, stride_time)
            log.info('Slice: %s', slice_)

        if parameters is not None:
            pdict = ParameterDictionary()
            params = set(coverage.list_parameters()).intersection(parameters)
            for param in params:
                pdict.add_context(coverage.get_parameter_context(param))
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            self.pdict = pdict
        else:
            rdt = RecordDictionaryTool(
                param_dictionary=coverage.parameter_dictionary)

        fields = coverage.list_parameters()
        if parameters is not None:
            fields = set(fields).intersection(parameters)

        for d in fields:
            rdt[d] = coverage.get_parameter_values(d, tdoa=slice_)
        self.rdt = rdt  # Sync

    def to_granule(self):
        return self.rdt.to_granule()

    @classmethod
    def create_coverage(cls):
        pdict = cls.create_parameters()
        sdom, tdom = cls.create_domains()

        scov = SimplexCoverage('sample grid coverage_model', pdict, tdom, sdom)

        return scov

    @classmethod
    def create_domains(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        tcrs = CRS([AxisTypeEnum.TIME])
        scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT, AxisTypeEnum.HEIGHT])

        tdom = GridDomain(GridShape('temporal', [0]), tcrs,
                          MutabilityEnum.EXTENSIBLE)
        sdom = GridDomain(
            GridShape('spatial', [0]), scrs,
            MutabilityEnum.IMMUTABLE)  # Dimensionality is excluded for now
        return sdom, tdom

    @classmethod
    def create_parameters(cls):
        '''
        WARNING: This method is a wrapper intended only for tests, it should not be used in production code.
        It probably will not align to most datasets.
        '''
        pdict = ParameterDictionary()
        t_ctxt = ParameterContext(
            'time', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        pdict.add_context(t_ctxt)

        lat_ctxt = ParameterContext(
            'lat', param_type=QuantityType(value_encoding=np.float32))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt.fill_value = 0e0
        pdict.add_context(lat_ctxt)

        lon_ctxt = ParameterContext(
            'lon', param_type=QuantityType(value_encoding=np.float32))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt.fill_value = 0e0
        pdict.add_context(lon_ctxt)

        temp_ctxt = ParameterContext(
            'temp', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        pdict.add_context(temp_ctxt)

        cond_ctxt = ParameterContext(
            'conductivity', param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        pdict.add_context(cond_ctxt)

        data_ctxt = ParameterContext(
            'data', param_type=QuantityType(value_encoding=np.int8))
        data_ctxt.uom = 'byte'
        data_ctxt.fill_value = 0x0
        pdict.add_context(data_ctxt)

        pres_ctxt = ParameterContext(
            'pressure', param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        pdict.add_context(pres_ctxt)

        sal_ctxt = ParameterContext(
            'salinity', param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        pdict.add_context(sal_ctxt)

        dens_ctxt = ParameterContext(
            'density', param_type=QuantityType(value_encoding=np.float32))
        dens_ctxt.uom = 'unknown'
        dens_ctxt.fill_value = 0x0
        pdict.add_context(dens_ctxt)

        return pdict

    def build_coverage(self):
        pass
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)


        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)


        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60 :
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Example #40
0
    def test_granule(self):

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription(
            'sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name,
                          rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1: 1}

        publisher.publish(
            rdt.to_granule(data_producer_id='data_producer',
                           provider_metadata_update={1: 1}))

        self.assertTrue(self.event.wait(10))

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)

        filtered_stream_def_id = self.pubsub_management.create_stream_definition(
            'filtered',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields, ['time', 'temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k, v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None, None, None])
        self.assertTrue(rdt['time'] is None)

        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))
Example #41
0
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Example #42
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))