def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
 def loclrng_checks(self):
     ref = 'GP03FLMA-RI001-06-CTDMOG999'
     self.stream_def_id = self.create_stream_definition(
         'local range check',
         parameter_dictionary_id=self.pdict_id,
         stream_configuration={'reference_designator': ref})
     self.rdt = RecordDictionaryTool(
         stream_definition_id=self.stream_def_id)
     self.rdt['time'] = np.arange(10)
     lon, lat, pressure = [-124.832179, 46.436926, 37.5]
     self.rdt['lon'] = lon
     self.rdt['lat'] = lat
     self.rdt['pressure'] = [37.5] * 10
     self.rdt['temp'] = [30] * 10
     self.rdt.fetch_lookup_values()
     doc = self.svm.read_value('lrt_%s_TEMPWAT' % ref)
     self.assertEquals(doc['dims'], ['lon', 'lat', 'pressure'])
     doc = self.svm.read_value('lrt_%s_PRESWAT' % ref)
     self.assertEquals(doc['dims'], ['temp'])
     np.testing.assert_array_equal(self.rdt['tempwat_loclrng_qc'], [1] * 10)
     self.rdt['pressure'] = [37.5] * 9 + [75.]
     self.rdt['temp'] = [30] * 9 + [18]
     np.testing.assert_array_equal(self.rdt['tempwat_loclrng_qc'],
                                   [1] * 9 + [0])
     self.rdt['temp'] = [15] * 5 + [35] * 5
     self.rdt['pressure'] = [10] * 10
     np.testing.assert_array_equal(self.rdt['preswat_loclrng_qc'],
                                   [1] * 5 + [0] * 5)
 def new_rdt(self, ref='QCTEST'):
     self.stream_def_id = self.create_stream_definition(
         uuid4().hex,
         parameter_dictionary_id=self.pdict_id,
         stream_configuration={'reference_designator': 'QCTEST'})
     self.rdt = RecordDictionaryTool(
         stream_definition_id=self.stream_def_id)
    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))
    def create_test_granules(self, buffer_data=False):
        """
        Generate test granules from particles.  If buffer data is set to true then
        try to buffer data into a granule.  If the particle has the new sequence
        flag set then a new granule will be generated.  This method emulates the
        agent_stream_publisher module.
        :return: list of granules generated.
        """
        base_timestamp = 3583861263.0
        connection_index = 0

        particles = []
        particles.append(
            self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0))
        particles.append(
            self.get_particle(base_timestamp + 1, 10.5915, 161.07, 4.1871,
                              2693.1))
        particles.append(
            self.get_particle(base_timestamp + 2, 10.5916, 161.08, 4.1872,
                              2693.2))
        particles.append(
            self.get_particle(base_timestamp + 3, 10.5917, 161.09, 4.1873,
                              2693.3, True))
        particles.append(
            self.get_particle(base_timestamp + 4, 10.5918, 161.10, 4.1874,
                              2693.4))

        data_groups = []
        result_granules = []
        data_groups_index = 0

        for particle in particles:
            # If we need a new connection then start a new group, but only if we have found
            # something in the current group
            if (particle.get('new_sequence', False) or buffer_data == False) and \
               (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0):
                data_groups_index += 1

            if len(data_groups) <= data_groups_index:
                data_groups.append([])

            data_groups[data_groups_index].append(particle)

        log.debug("Granules to create: %s", len(data_groups))

        for data in data_groups:
            connection_id = uuid.uuid4()
            connection_index += 1
            rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict())

            rdt = populate_rdt(rdt, data)

            g = rdt.to_granule(data_producer_id='agent_res_id',
                               connection_id=connection_id.hex,
                               connection_index=str(connection_index))

            result_granules.append(g)

        return result_granules
 def create_granule(self, stream_name, param_dict_name, particle_list):
     pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True)
     stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id)        
     stream_def = self.pubsub_client.read_stream_definition(stream_def_id)
     rdt = RecordDictionaryTool(stream_definition=stream_def)
     rdt = populate_rdt(rdt, particle_list)
     log.trace("RDT: %s", str(rdt))
     g = rdt.to_granule(data_producer_id='fake_agent_id')
     return g
 def create_granule(self, stream_name, param_dict_name, particle_list):
     pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True)
     stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id)        
     stream_def = self.pubsub_client.read_stream_definition(stream_def_id)
     rdt = RecordDictionaryTool(stream_definition=stream_def)
     rdt = populate_rdt(rdt, particle_list)
     log.trace("RDT: %s", str(rdt))
     g = rdt.to_granule(data_producer_id='fake_agent_id')
     return g
    def publish_hifi(self,stream_id,stream_route,offset=0):
        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
    def glblrng_checks(self):
        ref = 'CE01ISSM-MF005-01-CTDBPC999'
        self.stream_def_id = self.create_stream_definition('global range check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(10)
        self.rdt['temp'] = [-10] * 5 + [4] * 5
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_glblrng_qc'], [0]*5 + [1]*5)
Beispiel #10
0
    def publish_loop(self):
        t_i = 0
        while not self.finished.is_set():
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
            rdt['time'] = numpy.arange(10) + t_i*10
            rdt['temp'] = numpy.random.random_sample(10)*(30-0)+0

            self.publish(rdt.to_granule())
            gevent.sleep(self.interval)
            t_i += 1
    def spketst_checks(self):
        ref = 'GP02HYPM-SP001-04-CTDPF0999'
        self.stream_def_id = self.create_stream_definition('spike test check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(20)
        self.rdt['temp'] = [13] * 9 + [100] + [13] * 10
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_spketst_qc'], [1]*9 + [0] + [1]*10)
    def stuckvl_checks(self):
        ref = 'GP02HYPM-SP001-04-CTDPF0999'
        self.stream_def_id = self.create_stream_definition('stuck value checks', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(50)
        self.rdt['temp'] = [20] * 30 + range(20)
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_stuckvl_qc'], [0]*30 + [1]*20)
Beispiel #13
0
    def publish_loop(self):

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array(
                [random.uniform(0.0, 75.0) for i in xrange(length)])
            t = numpy.array(
                [random.uniform(-1.7, 21.0) for i in xrange(length)])
            p = numpy.array(
                [random.lognormvariate(1, 2) for i in xrange(length)])
            lat = numpy.array(
                [random.uniform(-90.0, 90.0) for i in xrange(length)])
            lon = numpy.array(
                [random.uniform(0.0, 360.0) for i in xrange(length)])
            tvar = numpy.array(
                [self.last_time + i for i in xrange(1, length + 1)])
            self.last_time = max(tvar)

            parameter_dictionary = self._create_parameter()
            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            # This is an example of using groups it is not a normative statement about how to use groups

            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom', 'r') as rand:
                rdt['raw_fixed'] = numpy.array(
                    [rand.read(32) for i in xrange(length)], dtype='a32')

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom', 'r') as rand:
                rdt['raw_blob'] = numpy.array(
                    [rand.read(random.randint(1, 40)) for i in xrange(length)],
                    dtype=object)

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon

            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if isinstance(g, Granule):
                self.publish(g)

            gevent.sleep(self.interval)
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            if isinstance(stream_def, str):
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            else:
                rdt = RecordDictionaryTool(stream_definition=stream_def)
                
            publisher = self._publishers[stream_name]
                
            vals = []
            for x in xrange(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())
    
            rdt = populate_rdt(rdt, vals)
            
            #log.info('Outgoing granule: %s',
                     #['%s: %s'%(k,v) for k,v in rdt.iteritems()])
            #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0])
            #log.info('Outgoing granule destined for stream: %s', stream_name)
            g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex,
                    connection_index=str(self._connection_index[stream_name]))
            
            publisher.publish(g)
            #log.info('Instrument agent %s published data granule on stream %s.',
                #self._agent._proc_name, stream_name)
            #log.info('Connection id: %s, connection index: %i.',
                     #self._connection_ID.hex, self._connection_index[stream_name])
            self._connection_index[stream_name] += 1
        except:
            log.exception('Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)
    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id
Beispiel #17
0
 def publish_loop(self):
     t_i = 0
     while not self.finished.is_set():
         rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
         rdt['time']         = numpy.arange(10) + t_i*10
         rdt['temp']         = numpy.random.random(10) * 10
         rdt['lat']          = numpy.array([0] * 10)
         rdt['lon']          = numpy.array([0] * 10)
         rdt['conductivity'] = numpy.random.random(10) * 10
         rdt['binary']         = numpy.array(['hi'] * 10, dtype='object')
         
         self.publish(rdt.to_granule())
         gevent.sleep(self.interval)
         t_i += 1
    def publish_loop(self):

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])
            t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])
            p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)])
            lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)])
            lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])
            tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)])
            self.last_time = max(tvar)

            parameter_dictionary = self._create_parameter()
            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            # This is an example of using groups it is not a normative statement about how to use groups



            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom','r') as rand:
                rdt['raw_fixed'] = numpy.array([rand.read(32) for i in xrange(length)], dtype='a32')

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom','r') as rand:
                rdt['raw_blob'] = numpy.array([rand.read(random.randint(1,40)) for i in xrange(length)], dtype=object)



            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon


            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if isinstance(g,Granule):
                self.publish(g)

            gevent.sleep(self.interval)
Beispiel #19
0
    def create_test_granules(self, buffer_data=False):
        """
        Generate test granules from particles.  If buffer data is set to true then
        try to buffer data into a granule.  If the particle has the new sequence
        flag set then a new granule will be generated.  This method emulates the
        agent_stream_publisher module.
        :return: list of granules generated.
        """
        base_timestamp = 3583861263.0
        connection_index = 0

        particles = []
        particles.append(self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0))
        particles.append(self.get_particle(base_timestamp+1, 10.5915, 161.07, 4.1871, 2693.1))
        particles.append(self.get_particle(base_timestamp+2, 10.5916, 161.08, 4.1872, 2693.2))
        particles.append(self.get_particle(base_timestamp+3, 10.5917, 161.09, 4.1873, 2693.3, True))
        particles.append(self.get_particle(base_timestamp+4, 10.5918, 161.10, 4.1874, 2693.4))

        data_groups = []
        result_granules = []
        data_groups_index = 0

        for particle in particles:
            # If we need a new connection then start a new group, but only if we have found
            # something in the current group
            if (particle.get('new_sequence', False) or buffer_data == False) and \
               (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0):
                data_groups_index += 1

            if len(data_groups) <= data_groups_index:
                data_groups.append([])

            data_groups[data_groups_index].append(particle)

        log.debug("Granules to create: %s", len(data_groups))

        for data in data_groups:
            connection_id = uuid.uuid4()
            connection_index += 1
            rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict())

            rdt = populate_rdt(rdt, data)

            g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex,
                               connection_index=str(connection_index))

            result_granules.append(g)

        return result_granules
    def test_replay_with_parameters(self):
        # --------------------------------------------------------------------------------
        # Create the configurations and the dataset
        # --------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )

        stream_id, route = self.pubsub_management.create_stream(
            "replay_with_params", exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id
        )
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        query = {
            "start_time": 0 - 2208988800,
            "end_time": 20 - 2208988800,
            "stride_time": 2,
            "parameters": ["time", "temp"],
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0, 20, 2) == rdt["time"]
        self.assertTrue(comp.all(), "%s" % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=["time", "temp"])
        self.assertTrue(extents["time"] >= 20)
        self.assertTrue(extents["temp"] >= 20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)
Beispiel #21
0
    def _extract_granule_data(self, granules):
        """
        Pull all data out of all granules and return a dict of values
        """
        result = []
        for granule in granules:
            group = []
            log.debug("Granule: %s", granule)
            rdt = RecordDictionaryTool.load_from_granule(granule)

            # Store the data from each record
            for key, value in rdt.iteritems():
                for i in range(0, len(value)):
                    if len(group) <= i:
                        group.append({})
                    group[i][key] = value[i]

                    # Store the connection information for each record
                    if not 'connection_index' in group[i]:
                        group[i]['connection_index'] = granule.connection_index

                    if not 'connection_id' in group[i]:
                        group[i]['connection_id'] = granule.connection_id

            result += group

        log.debug("extracted granules: %s", pprint.pformat(result))

        return result
    def test_last_granule(self):
        # --------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        # --------------------------------------------------------------------------------
        # Create the datastore first,
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.publish_hifi(stream_id, 0)
        self.publish_hifi(stream_id, 1)

        self.wait_until_we_have_enough_granules(dataset_id, 2)  # I just need two

        replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(replay_granule)

        comp = rdt["time"] == np.arange(10) + 10

        self.assertTrue(comp.all())
    def test_stuck_value_test(self):
        TestQCFunctions.test_stuck_value_test(self)
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_stuckvl_qc'], [1, 1, 0, 0, 0, 0, 1, 1, 1, 1])
    def check_localrange(self):
        log.info('check_localrange')
        TestQCFunctions.check_localrange(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            if not event.qc_parameter == 'tempwat_loclrng_qc':
                return
            np.testing.assert_array_equal(
                times,
                np.array([
                    3580144708.7555027, 3580144709.7555027, 3580144710.7555027,
                    3580144711.7555027, 3580144712.7555027
                ]))
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)
        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_loclrng_qc'],
                                             [1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
        self.assertTrue(flagged.wait(10))
Beispiel #25
0
    def _extract_granule_data(self, granules):
        """
        Pull all data out of all granules and return a dict of values
        """
        result = []
        for granule in granules:
            group = []
            log.debug("Granule: %s", granule)
            rdt = RecordDictionaryTool.load_from_granule(granule)

            # Store the data from each record
            for key, value in rdt.iteritems():
                for i in range(0, len(value)):
                    if len(group) <= i:
                        group.append({})
                    group[i][key] = value[i]

                    # Store the connection information for each record
                    if not 'connection_index' in group[i]:
                        group[i]['connection_index'] = granule.connection_index

                    if not 'connection_id' in group[i]:
                        group[i]['connection_id'] = granule.connection_id



            result += group

        log.debug("extracted granules: %s", pprint.pformat(result))

        return result
 def validate_granule_subscription(self, msg, route, stream_id):
     if msg == {}:
         return
     rdt = RecordDictionaryTool.load_from_granule(msg)
     log.info('%s', rdt.pretty_print())
     self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
     self.event.set()
    def check_global_range(self):
        TestQCFunctions.check_global_range(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(times, [0.0, 7.0])
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'],
                                             [0, 1, 1, 1, 1, 1, 1, 0])
        self.assertTrue(flagged.wait(10))
    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
    def test_replay_with_parameters(self):
        # --------------------------------------------------------------------------------
        # Create the configurations and the dataset
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id, 4)

        query = {"start_time": 0, "end_time": 20, "parameters": ["time", "temp"]}
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(20) == rdt["time"]
        self.assertTrue(comp.all(), "%s" % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))
    def test_repersist_data(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(name='ctd', parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(name='repersist', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.get_datastore(dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,2)
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,4)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
    def check_fill_values(self):
        log.info('check_fill_values')
        self.new_rdt()
        self.init_check()
        self.rdt['time'] = np.arange(5)
        self.rdt['temp'] = [12] * 5
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_glblrng_qc'],
                                      [-99] * 5)
        np.testing.assert_array_equal(self.rdt['tempwat_spketst_qc'],
                                      [-99] * 5)
        np.testing.assert_array_equal(self.rdt['tempwat_stuckvl_qc'],
                                      [-99] * 5)
        np.testing.assert_array_equal(self.rdt['tempwat_trndtst_qc'],
                                      [-99] * 5)
        np.testing.assert_array_equal(self.rdt['tempwat_gradtst_qc'],
                                      [-99] * 5)
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)

        self.dataset_monitor.event.wait(10)
        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_equal(rdt['tempwat_glblrng_qc'], [-99] * 5)
        np.testing.assert_array_equal(rdt['tempwat_spketst_qc'], [-99] * 5)
        np.testing.assert_array_equal(rdt['tempwat_stuckvl_qc'], [-99] * 5)
        np.testing.assert_array_equal(rdt['tempwat_trndtst_qc'], [-99] * 5)
        np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'], [-99] * 5)
    def test_replay_by_time(self):
        log.info("starting test...")

        # --------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        # --------------------------------------------------------------------------------
        # Create the datastore first,
        # --------------------------------------------------------------------------------
        # There is a race condition sometimes between the services and the process for
        # the creation of the datastore and it's instance, this ensures the datastore
        # exists before the process is even subscribing to data.
        self.get_datastore(dataset_id)

        self.publish_fake_data(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 2)  # I just need two

        replay_granule = self.data_retriever.retrieve(dataset_id, {"start_time": 0, "end_time": 6})

        rdt = RecordDictionaryTool.load_from_granule(replay_granule)

        comp = rdt["time"] == np.array([0, 1, 2, 3, 4, 5])

        try:
            log.info("Compared granule: %s", replay_granule.__dict__)
            log.info("Granule tax: %s", replay_granule.taxonomy.__dict__)
        except:
            pass
        self.assertTrue(comp.all())
    def check_gradient(self):
        log.info('check_gradient')
        TestQCFunctions.check_gradient(self)
        self.init_check()
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'], [1, 1, 0, 0, 1])
    def check_trend(self):
        log.info('check_trend')
        TestQCFunctions.check_trend(self)
        self.init_check()
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.assertTrue(self.dataset_monitor.wait())

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_trndtst_qc'], [1] * 10)
    def check_trend(self):
        log.info('check_trend')
        TestQCFunctions.check_trend(self)
        self.init_check()
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.assertTrue(self.dataset_monitor.wait())

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_trndtst_qc'], [1] * 10)
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
    def get_visualization_image(self, data_product_id='', visualization_parameters=None, callback=''):

        # Error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the retrieval related parameters. Definitely init all parameters first
        query = None
        if visualization_parameters :
            query = {'parameters':[]}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')
                query['parameters'] = visualization_parameters['parameters']

            if 'stride_time' in visualization_parameters:
                query['stride_time'] = visualization_parameters['stride_time']
            if 'start_time' in visualization_parameters:
                query['start_time'] = visualization_parameters['start_time']
            if 'end_time' in visualization_parameters:
                query['end_time'] = visualization_parameters['end_time']

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True)
        if ds_ids is None or not ds_ids:
            return None

        # Ideally just need the latest granule to figure out the list of images
        #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
        retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query)

        if retrieved_granule is None:
            return None

        # send the granule through the transform to get the matplotlib graphs
        mpl_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True)
        mpl_stream_def = self.clients.pubsub_management.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id)
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=visualization_parameters, params=mpl_stream_def)

        if mpl_data_granule == None:
            return None

        mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule)

        ret_dict = dict()
        ret_dict['content_type'] = (get_safe(mpl_rdt, "content_type"))[0]
        ret_dict['image_name'] = (get_safe(mpl_rdt, "image_name"))[0]
        # reason for encoding as base64 string is otherwise message pack complains about the bit stream
        ret_dict['image_obj'] = base64.encodestring((get_safe(mpl_rdt, "image_obj"))[0])

        if callback == '':
            return ret_dict
        else:
            return callback + "(" + simplejson.dumps(ret_dict) + ")"
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
    def test_retrieve_and_transform(self):

        # Stream definition for the CTD data
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id)


        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        #--------------------------------------------------------------------------------
        # Again with this ridiculous problem
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)
        self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 2)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)

        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id)
        es.start()

        self.addCleanup(es.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_modified.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   20 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)
    def _process_visualization_message(self, messages):

        final_hc_data = []
        final_hc_data_no_numpy = []

        for message in messages:

            if message == None:
                continue

            message_data = message.body

            if isinstance(message_data, Granule):

                rdt = RecordDictionaryTool.load_from_granule(message_data)
                hc_data_arr = get_safe(rdt, 'hc_data')

                # IF this granule does not contain google dt, skip
                if hc_data_arr is None:
                    continue

                hc_data = hc_data_arr[0]

                for series in hc_data:
                    if not series.has_key("name"):
                        continue
                    series_name = series["name"]

                    # find index in final data. If it does not exist, create it
                    final_hc_data_idx = -1
                    idx = 0
                    for _s in final_hc_data:
                        if _s["name"] == series_name:
                            final_hc_data_idx = idx
                            break
                        idx += 1

                    # create an entry in the final hc structure if there's none for this series
                    if final_hc_data_idx == -1:
                        final_hc_data.append({})
                        final_hc_data_idx = len(final_hc_data) - 1
                        final_hc_data[final_hc_data_idx]["name"] = series_name
                        final_hc_data[final_hc_data_idx]["data"] = []

                        if series.has_key("visible"):
                            final_hc_data[final_hc_data_idx][
                                "visible"] = series["visible"]
                        if series.has_key("tooltip"):
                            final_hc_data[final_hc_data_idx][
                                "tooltip"] = series["tooltip"]

                    # Append the series data to the final hc structure
                    final_hc_data[final_hc_data_idx]["data"] += series[
                        "data"].tolist()

        return json.dumps(final_hc_data)
    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection1, connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection1, connection_index='1'))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id,
                              rdt.to_granule(
                                  connection_id=connection1,
                                  connection_index='3'))  # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id,
                              rdt.to_granule(
                                  connection_id=connection2,
                                  connection_index='3'))  # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection2, connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection2, connection_index='5'))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id
    def glblrng_checks(self):
        ref = 'CE01ISSM-MF005-01-CTDBPC999'
        self.stream_def_id = self.create_stream_definition('global range check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(10)
        self.rdt['temp'] = [-10] * 5 + [4] * 5
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_glblrng_qc'], [0]*5 + [1]*5)
    def spketst_checks(self):
        ref = 'GP02HYPM-SP001-04-CTDPF0999'
        self.stream_def_id = self.create_stream_definition('spike test check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(20)
        self.rdt['temp'] = [13] * 9 + [100] + [13] * 10
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_spketst_qc'], [1]*9 + [0] + [1]*10)
 def validate_granule_subscription(self, msg, route, stream_id):
     '''
     Validation for granule format
     '''
     if msg == {}:
         return
     rdt = RecordDictionaryTool.load_from_granule(msg)
     log.info('%s', rdt.pretty_print())
     self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
     self.event.set()
    def stuckvl_checks(self):
        ref = 'GP02HYPM-SP001-04-CTDPF0999'
        self.stream_def_id = self.create_stream_definition('stuck value checks', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

        self.rdt['time'] = np.arange(50)
        self.rdt['temp'] = [20] * 30 + range(20)
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_stuckvl_qc'], [0]*30 + [1]*20)
    def setUp(self):
        DMTestCase.setUp(self)
        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_qc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition('global range', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':'QCTEST'})
        self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id)

        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        self.svm = StoredValueManager(self.container)
    def check_gradient(self):
        log.info('check_gradient')
        TestQCFunctions.check_gradient(self)
        self.init_check()
        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'],
                                      [1, 1, 0, 0, 1])
 def assert_granule_time(self, granule, target_time):
     """
     Assert that the granule's time matches the target time
     @param granule The granule to be searched
     @param target_time The time that should match the granule's overall time
     """
     rdt = RecordDictionaryTool.load_from_granule(granule)
     rdt_time = rdt['time'][0]
     log.debug("assert_granule_time granule time: %s", rdt_time)
     log.debug("assert_granule_time target timestamp: %s", target_time)
     self.assertEqual(rdt_time, target_time)
    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingestion_config_id,
            dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream,
                        stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'],
                                             np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'],
                                             np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'],
                                             np.array([3.068]))
        np.testing.assert_array_almost_equal(
            rdt_out['density'], np.array([1021.7144739593881],
                                         dtype='float32'))
        np.testing.assert_array_almost_equal(
            rdt_out['salinity'], np.array([30.935132729668283],
                                          dtype='float32'))
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())
    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            'sal data', parameter_dictionary_id=salinity_pdict_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'CTDL2SalinityTransformAlgorithm',
            kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)
 def _construct_streams(self, stream_info):
     decoder = IonObjectDeserializer(obj_registry=get_obj_registry())
     for (stream_name, config) in stream_info.iteritems():
         try:
             if config.has_key('stream_def_dict'):
                 stream_def_dict = config['stream_def_dict']
                 stream_def_dict['type_'] = 'StreamDefinition'
                 stream_def_obj = decoder.deserialize(stream_def_dict)
                 self._stream_defs[stream_name] = stream_def_obj
                 rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
             else:
                 stream_def = config['stream_definition_ref']
                 self._stream_defs[stream_name] = stream_def
                 rdt = RecordDictionaryTool(stream_definition_id=stream_def)    
             self._agent.aparam_streams[stream_name] = rdt.fields
             self._agent.aparam_pubrate[stream_name] = 0
         except Exception as e:
             errmsg = 'Instrument agent %s' % self._agent._proc_name
             errmsg += 'error constructing stream %s. ' % stream_name
             errmsg += str(e)
             log.error(errmsg)
             
     self._agent.aparam_set_pubrate = self.aparam_set_pubrate
 def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
     '''
     Loops until there is a sufficient amount of data in the dataset
     '''
     done = False
     with gevent.Timeout(40):
         while not done:
             extents = self.dataset_management.dataset_extents(dataset_id, 'time')
             granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
             rdt     = RecordDictionaryTool.load_from_granule(granule)
             if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                 done = True
             else:
                 gevent.sleep(0.2)
Beispiel #55
0
    def assert_raw_granules_ingested(self, count, payload_size):
        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------
        data_retriever = DataRetrieverServiceClient()

        for i in range(0, count - 1):
            granule = data_retriever.retrieve(dataset_id=self._raw_dataset_id,
                                              query={'tdoa': slice(i, i + 1)})
            rdt = RecordDictionaryTool.load_from_granule(granule)

            log.info("Granule index: %d, time: %s, size: %s", i,
                     rdt['time'][0], len(rdt['raw'][0]))
            self.assertEqual(payload_size, len(rdt['raw'][0]))