def test_array_flow_paths(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dm = DatasetMonitor(dataset_id)
        self.addCleanup(dm.stop)


        # I need to make sure that we can fill the RDT with its values
        # Test for one timestep
        # Test for multiple timesteps
        # Publishes 
        # Ingests correctly
        # Retrieves correctly

        #--------------------------------------------------------------------------------
        # Ensure that the RDT can be filled with ArrayType values
        #--------------------------------------------------------------------------------
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp_sample'] = [[0,1,2,3,4]]
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        #--------------------------------------------------------------------------------
        # Ensure that it deals with multiple values
        #--------------------------------------------------------------------------------

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1,2,3]
        rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]]

        m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max
        np.testing.assert_equal(m,np.finfo(np.float32).max)
        np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]])
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()


        #--------------------------------------------------------------------------------
        # Retrieve and Verify
        #--------------------------------------------------------------------------------

        retrieved_granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)
        np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3]))
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
Example #2
0
    def test_array_flow_paths(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dm = DatasetMonitor(dataset_id)
        self.addCleanup(dm.stop)


        # I need to make sure that we can fill the RDT with its values
        # Test for one timestep
        # Test for multiple timesteps
        # Publishes 
        # Ingests correctly
        # Retrieves correctly

        #--------------------------------------------------------------------------------
        # Ensure that the RDT can be filled with ArrayType values
        #--------------------------------------------------------------------------------
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp_sample'] = [[0,1,2,3,4]]
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        #--------------------------------------------------------------------------------
        # Ensure that it deals with multiple values
        #--------------------------------------------------------------------------------

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1,2,3]
        rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]]

        m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max
        np.testing.assert_equal(m,np.finfo(np.float32).max)
        np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]])
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()


        #--------------------------------------------------------------------------------
        # Retrieve and Verify
        #--------------------------------------------------------------------------------

        retrieved_granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)
        np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3]))
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
Example #3
0
    def test_append_parameter(self):
        # Make a CTDBP Data Product
        data_product_id = self.make_ctd_data_product()
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        # Throw some data in it
        rdt = self.ph.rdt_for_data_product(data_product_id)
        rdt['time'] = np.arange(30)
        rdt['temp'] = np.arange(30)
        rdt['pressure'] = np.arange(30)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.event.clear()

        # Grab the egg
        egg_url = self.egg_url
        egg_path = TransformWorker.download_egg(egg_url)
        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)
        self.addCleanup(os.remove, egg_path)

        # Make a parameter function
        owner = 'ion_example.add_arrays'
        func = 'add_arrays'
        arglist = ['a', 'b']
        pf = ParameterFunction(name='add_arrays',
                               function_type=PFT.PYTHON,
                               owner=owner,
                               function=func,
                               args=arglist)
        pfunc_id = self.dataset_management.create_parameter_function(pf)
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        pfunc_id)

        # Make a context (instance of the function)
        context = ParameterContext(name='array_sum',
                                   units="1",
                                   fill_value="-9999",
                                   parameter_function_id=pfunc_id,
                                   parameter_type="function",
                                   value_encoding="float32",
                                   display_name="Array Summation",
                                   parameter_function_map={
                                       'a': 'temp',
                                       'b': 'pressure'
                                   })
        #pfunc = DatasetManagementService.get_coverage_function(pf)
        #pfunc.param_map = {'a':'temp', 'b':'pressure'}
        #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc))
        #ctxt_dump = ctxt.dump()
        #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump)
        ctxt_id = self.dataset_management.create_parameter(context)
        self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2))
    def test_coefficient_compatibility(self):
        data_product_id = self.create_data_product(
            name='Calibration Coefficient Test Data product',
            stream_def_id=self.stream_def_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = [10] * 10
        rdt['cc_coefficient'] = [2] * 10
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())

        rdt2 = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(dataset_id))
        np.testing.assert_array_equal(rdt2['offset'], [12] * 10)
    def test_instrument_simple(self):
        instrument_model_id = self.create_instrument_model()
        instrument_agent_id = self.create_instrument_agent(instrument_model_id)
        instrument_device_id = self.create_instrument_device(instrument_model_id)
        instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id)

        raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id)

        self.start_instrument_agent_instance(instrument_agent_instance_id)

        agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id)

        agent_client = ResourceAgentClient(instrument_device_id,
                                              to_name=agent_process_id,
                                              process=FakeProcess())

        self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE)
        self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE)
        self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id)

        for i in xrange(10):
            monitor = DatasetMonitor(dataset_id=dataset_id)
            agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE))
            if not monitor.event.wait(30):
                raise AssertionError('Failed on the %ith granule' % i)
            monitor.stop()

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id))
        self.assertEquals(len(rdt), 10)
    def test_instrument_simple(self):
        instrument_model_id = self.create_instrument_model()
        instrument_agent_id = self.create_instrument_agent(instrument_model_id)
        instrument_device_id = self.create_instrument_device(instrument_model_id)
        instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id)

        raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id)

        self.start_instrument_agent_instance(instrument_agent_instance_id)

        agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id)

        agent_client = ResourceAgentClient(instrument_device_id,
                                              to_name=agent_process_id,
                                              process=FakeProcess())

        self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE)
        self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE)
        self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id)

        for i in xrange(10):
            monitor = DatasetMonitor(dataset_id=dataset_id)
            agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE))
            if not monitor.wait():
                raise AssertionError('Failed on the %ith granule' % i)
            monitor.stop()

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id))
        self.assertEquals(len(rdt), 10)
 def cb(msg, sr, sid):
     self.assertEqual(sid, stream_id_out)
     rdt_out = RecordDictionaryTool.load_from_granule(msg)
     self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
     for k,v in rdt_out.iteritems():
         self.assertEquals(rdt_out[k], None)
     e.set()
    def test_example_preload(self):
        print 'preloading...'
        self.preload_example1()

        data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD102', alt_id_ns='PRE')
        data_product_id = data_product_ids[0]
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        with DirectCoverageAccess() as dca:
            dca.upload_calibration_coefficients(dataset_id, 'test_data/sbe16coeffs.csv', 'test_data/sbe16coeffs.yml')

        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        rdt = ph.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['temperature'] = [248471]
        rdt['pressure'] = [528418]
        rdt['conductivity'] = [1673175]
        rdt['thermistor_temperature']=[24303]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)
        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)

        breakpoint(locals())
 def verify_incoming(self, m,r,s):
     rdt = RecordDictionaryTool.load_from_granule(m)
     self.assertEquals(rdt, self.rdt)
     self.assertEquals(m.data_producer_id, self.data_producer_id)
     self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
     self.assertNotEqual(m.creation_timestamp, None)
     self.event.set()
    def test_example2_preload(self):
        print 'preloading...'
        self.preload_example2()

        data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD104', alt_id_ns='PRE')
        data_product_id = data_product_ids[0]
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        with DirectCoverageAccess() as dca:
            dca.upload_calibration_coefficients(dataset_id, 'test_data/vel3d_coeff.csv', 'test_data/vel3d_coeff.yml')

        from ion_functions.data.test.test_vel_functions import TS, VE, VN, VU

        rdt = ParameterHelper.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['velocity_east'] = [VE[0]]
        rdt['velocity_north'] = [VN[0]]
        rdt['velocity_up'] = [VU[0]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)
        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)

        breakpoint(locals())
    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap
        end_time   = end_time or now
        
        qc_params  = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes
        
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st,et in self.chop(int(start_time),int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et})
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val==0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{})
    def test_lctest_preload(self):
        self.preload_lctest()


        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sparse_dict', id_only=True)
        stream_def_id = self.create_stream_definition('sparse_example', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('sparse_example', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        rdt = ParameterHelper.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['sparse_float'] = [3.14159265358979323]
        rdt['sparse_double'] = [2.7182818284590452353602874713526624977572470936999595]
        rdt['sparse_int'] = [131071] # 6th mersenne prime
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        for i in xrange(10):
            dataset_monitor.event.clear()
            rdt = ParameterHelper.rdt_for_data_product(data_product_id)
            rdt['time'] = [time.time() + 2208988800]
            ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
            dataset_monitor.event.wait(10)


        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)
            
        breakpoint(locals())
    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832,
            29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383,
            15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876,
            10.9834, 11.0098, 5.3456, 4.2994, 4.3009]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.event.wait(20))
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009]])
        return passing
    def test_execute_transform(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)

        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        
        msg = rdt_in.to_granule()
        #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}})
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out))
        #need below to wrap result in a param val object
        rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule())
        for k,v in rdt_out.iteritems():
            self.assertEqual(len(v), dt)        
        
        self._validate_transforms(rdt_in, rdt_out)
        self.container.proc_manager.terminate_process(pid)
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))
Example #16
0
 def verify_incoming(self, m,r,s):
     rdt = RecordDictionaryTool.load_from_granule(m)
     for k,v in rdt.iteritems():
         np.testing.assert_array_equal(v, self.rdt[k])
     self.assertEquals(m.data_producer_id, self.data_producer_id)
     self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
     self.assertNotEqual(m.creation_timestamp, None)
     self.event.set()
Example #17
0
 def verify_incoming(self, m, r, s):
     rdt = RecordDictionaryTool.load_from_granule(m)
     self.assertEquals(rdt, self.rdt)
     self.assertEquals(m.data_producer_id, self.data_producer_id)
     self.assertEquals(m.provider_metadata_update,
                       self.provider_metadata_update)
     self.assertNotEqual(m.creation_timestamp, None)
     self.event.set()
 def verifier(msg, route, stream_id):
     for k,v in msg.record_dictionary.iteritems():
         if v is not None:
             self.assertIsInstance(v, np.ndarray)
     rdt = RecordDictionaryTool.load_from_granule(msg)
     for field in rdt.fields:
         self.assertIsInstance(rdt[field], np.ndarray)
     verified.set()
Example #19
0
 def verifier(msg, route, stream_id):
     for k, v in msg.record_dictionary.iteritems():
         if v is not None:
             self.assertIsInstance(v, np.ndarray)
     rdt = RecordDictionaryTool.load_from_granule(msg)
     for field in rdt.fields:
         self.assertIsInstance(rdt[field], np.ndarray)
     verified.set()
Example #20
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))
    def check_rsn_instrument_data_product(self):
        passing = True
        # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products
        # Parsed, Engineering
        # SFLPRES-0 SFLPRES-1
        # Check for the two data products and make sure they have the proper parameters
        # SFLPRES-0 should 
        data_products, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True)
        passing &=self.assertTrue(len(data_products)==1)
        if not data_products:
            return passing

        data_product_id = data_products[0]
        
        stream_defs, _ = self.RR.find_objects(data_product_id,PRED.hasStreamDefinition,id_only=False)
        passing &= self.assertTrue(len(stream_defs)==1)
        if not stream_defs:
            return passing

        # Assert that the stream definition has the correct reference designator
        stream_def = stream_defs[0]
        passing &= self.assertEquals(stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301')

        # Get the pdict and make sure that the parameters corresponding to the available fields 
        # begin with the appropriate data product identifier

        pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True)
        passing &= self.assertEquals(len(pdict_ids), 1)
        if not pdict_ids:
            return passing

        pdict_id = pdict_ids[0]
        
        pdict = DatasetManagementService.get_parameter_dictionary(pdict_id)
        available_params = [pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields]
        for p in available_params:
            if p.name=='time': # Ignore the domain parameter
                continue
            passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES'))
        passing &= self.check_presta_instrument_data_products('RS01SLBS-MJ01A-06-PRESTA101')
        passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101')
        passing &= self.check_presta_instrument_data_products('RS03AXBS-MJ03A-06-PRESTA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301')
        passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304')
        passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301')

        self.data_product_management.activate_data_product_persistence(data_product_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)
        self.data_product_management.suspend_data_product_persistence(data_product_id) # Should do nothing and not raise anything

        
        return passing
 def validator(msg, route, stream_id):
     rdt = RecordDictionaryTool.load_from_granule(msg)
     if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
         return
     if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
         return
     if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
         return
     validation_event.set()
 def validator(msg, route, stream_id):
     rdt = RecordDictionaryTool.load_from_granule(msg)
     if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
         return
     if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
         return
     if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
         return
     validation_event.set()
    def test_add_parameter_function(self):
        # req-tag: NEW SA - 31
        # Make a CTDBP Data Product
        data_product_id = self.make_ctd_data_product()
        self.data_product_id = data_product_id
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        # Throw some data in it
        rdt = self.ph.rdt_for_data_product(data_product_id)
        rdt['time'] = np.arange(30)
        rdt['temp'] = np.arange(30)
        rdt['pressure'] = np.arange(30)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.event.clear()

        #--------------------------------------------------------------------------------
        # This is what the user defines either via preload or through the UI
        #--------------------------------------------------------------------------------
        # Where the egg is
        egg_url = self.egg_url

        # Make a parameter function
        owner = 'ion_example.add_arrays'
        func = 'add_arrays'
        arglist = ['a', 'b']
        pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url)
        pfunc_id = self.dataset_management.create_parameter_function(pf)
        #--------------------------------------------------------------------------------
        self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id)

        # Make a data process definition
        dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays')
        dpd_id = self.data_process_management.create_data_process_definition(dpd, pfunc_id)

        # TODO: assert assoc exists
        argmap = {'a':'temp', 'b':'pressure'}
        dp_id = self.data_process_management.create_data_process(dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum')

        # Verify that the function worked!
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['array_sum'], np.arange(0,60,2))
    
        # Verify that we can inspect it as well
        source_code = self.data_process_management.inspect_data_process_definition(dpd_id)
        self.assertEquals(source_code, 'def add_arrays(a, b):\n    return a+b\n')

        url = self.data_process_management.get_data_process_definition_url(dpd_id) 
        self.assertEquals(url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg')

        dpd_ids, _ = self.resource_registry.find_resources(name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True)
        dpd_id = dpd_ids[0]
        url = self.data_process_management.get_data_process_definition_url(dpd_id) 
        self.assertEquals(url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py')
Example #25
0
 def recv_packet(self, granule, stream_route, stream_id):
     tool = RecordDictionaryTool.load_from_granule(granule)
     msg = ''
     for (k,v) in tool.iteritems():
         msg += '\n\t' + repr(k) + " => " + repr(v)
     if msg:
         log.debug('have granule with payload:' + msg)
     else:
         log.info('have empty granule')
        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)

            np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914]))
            np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.]))
            np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068]))
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881]))
            np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283]))

            validated.set()
    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            try:
                granule = self.data_retriever.retrieve(dataset_id,
                                                       query={
                                                           'start_time': st,
                                                           'end_time': et
                                                       })
            except BadRequest:
                data_products, _ = self.container.resource_registry.find_subjects(
                    object=dataset_id,
                    predicate=PRED.hasDataset,
                    subject_type=RT.DataProduct)
                for data_product in data_products:
                    log.exception('Failed to perform QC Post Processing on %s',
                                  data_product.name)
                    log.error('Calculated Start Time: %s', st)
                    log.error('Calculated End Time:   %s', et)
                raise
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})
    def test_ccov_visualization(self):
        '''
        Tests Complex Coverage aggregation of array types and proper splitting of coverages
        tests pydap and the visualization
        '''
        if not CFG.get_safe('bootstrap.use_pydap',False):
            raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)')

        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt, connection_id='abc1', connection_index='1')
        self.assertTrue(dataset_monitor.event.wait(10))
        dataset_monitor.event.clear()


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988810, 2208988820)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt, connection_id='abc2', connection_index='1')
        self.assertTrue(dataset_monitor.event.wait(10))
        dataset_monitor.event.clear()

        qstring = '{"stride_time": 1, "parameters": [], "query_type": "highcharts_data", "start_time": 0, "use_direct_access": 0, "end_time": 19}'
        graph = self.visualization.get_visualization_data(data_product_id, qstring)
        self.assertIn('temp_sample[3]', graph)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        np.testing.assert_array_equal(rdt['temp_sample'][0], np.arange(4))

        pydap_host = CFG.get_safe('server.pydap.host','localhost')
        pydap_port = CFG.get_safe('server.pydap.port',8001)
        url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id)

        ds = open_url(url)

        temp_sample, time = ds['temp_sample']
        temp_values, dim = temp_sample[0]
        np.testing.assert_array_equal(temp_values, np.array(['0.0,1.0,2.0,3.0']))
    def recv_packet(self, msg, route, stream_id):
        rdt = RecordDictionaryTool.load_from_granule(msg)

        document = {}

        for k,v in rdt.iteritems():
            value_array = np.atleast_1d(v[:])
            if 'f' in value_array.dtype.str:
                document[k] = float(value_array[-1])
            elif 'i' in value_array.dtype.str:
                document[k] = int(value_array[-1])

        self.stored_value_manager.stored_value_cas(self.document_key, document)
Example #30
0
    def recv_packet(self, msg, route, stream_id):
        rdt = RecordDictionaryTool.load_from_granule(msg)

        document = {}

        for k, v in rdt.iteritems():
            value_array = np.atleast_1d(v[:])
            if 'f' in value_array.dtype.str:
                document[k] = float(value_array[-1])
            elif 'i' in value_array.dtype.str:
                document[k] = int(value_array[-1])

        self.stored_value_manager.stored_value_cas(self.document_key, document)
    def test_append_parameter(self):
        # Make a CTDBP Data Product
        data_product_id = self.make_ctd_data_product()
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        # Throw some data in it
        rdt = self.ph.rdt_for_data_product(data_product_id)
        rdt['time'] = np.arange(30)
        rdt['temp'] = np.arange(30)
        rdt['pressure'] = np.arange(30)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.event.clear()

        # Grab the egg
        egg_url = self.egg_url
        egg_path = TransformWorker.download_egg(egg_url)
        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)
        self.addCleanup(os.remove, egg_path)

        # Make a parameter function
        owner = 'ion_example.add_arrays'
        func = 'add_arrays'
        arglist = ['a', 'b']
        pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist)
        pfunc_id = self.dataset_management.create_parameter_function(pf)
        self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id)

        # Make a context (instance of the function)
        context = ParameterContext(name='array_sum',
                                   units="1",
                                   fill_value="-9999",
                                   parameter_function_id=pfunc_id,
                                   parameter_type="function",
                                   value_encoding="float32",
                                   display_name="Array Summation",
                                   parameter_function_map={'a':'temp','b':'pressure'})
        #pfunc = DatasetManagementService.get_coverage_function(pf)
        #pfunc.param_map = {'a':'temp', 'b':'pressure'}
        #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc))
        #ctxt_dump = ctxt.dump()
        #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump)
        ctxt_id = self.dataset_management.create_parameter(context)
        self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['array_sum'], np.arange(0,60,2))
Example #32
0
 def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
     '''
     Loops until there is a sufficient amount of data in the dataset
     '''
     done = False
     with gevent.Timeout(40):
         while not done:
             granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
             rdt     = RecordDictionaryTool.load_from_granule(granule)
             extents = self.dataset_management.dataset_extents(dataset_id, rdt._pdict.temporal_parameter_name)[0]
             if rdt[rdt._pdict.temporal_parameter_name] and rdt[rdt._pdict.temporal_parameter_name][0] != rdt._pdict.get_context(rdt._pdict.temporal_parameter_name).fill_value and extents >= data_size:
                 done = True
             else:
                 gevent.sleep(0.2)
Example #33
0
    def write_to_data_product(self, data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                             'hasDataset',
                                                             id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                            'hasStream',
                                                            id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(
            stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(
                data_product_id).name
            return
        rdt[time_param] = np.arange(40)

        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(
                rdt._pdict.get_context(field).param_type, 40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
    def test_array_visualization(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        granule = rdt.to_granule()
        dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id))
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def)

        rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        testval = {'data_content': [
            [0.0 , 0.0  , 1.0  , 2.0  , 3.0  , 0.0  , 2.0  , 4.0  , 6.0  , 0.0  , 1.0  , 2.0  , 3.0]   ,
            [1.0 , 4.0  , 5.0  , 6.0  , 7.0  , 8.0  , 10.0 , 12.0 , 14.0 , 4.0  , 5.0  , 6.0  , 7.0]   ,
            [2.0 , 8.0  , 9.0  , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0  , 9.0  , 10.0 , 11.0]  ,
            [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0]  ,
            [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0]  ,
            [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0]  ,
            [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0]  ,
            [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0]  ,
            [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0]  ,
            [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] ,
                 'data_description': [('time', 'number', 'time'),
              ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}),
              ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}),
              ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}),
              ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}),
              ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}),
              ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}),
              ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}),
              ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}),
              ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}),
              ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}),
              ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}),
              ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})],
             'viz_product_type': 'google_dt'}
        self.assertEquals(rdt['google_dt_components'][0], testval)
Example #35
0
    def test_array_visualization(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        granule = rdt.to_granule()
        dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id))
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def)

        rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        testval = {'data_content': [
            [0.0 , 0.0  , 1.0  , 2.0  , 3.0  , 0.0  , 2.0  , 4.0  , 6.0  , 0.0  , 1.0  , 2.0  , 3.0]   ,
            [1.0 , 4.0  , 5.0  , 6.0  , 7.0  , 8.0  , 10.0 , 12.0 , 14.0 , 4.0  , 5.0  , 6.0  , 7.0]   ,
            [2.0 , 8.0  , 9.0  , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0  , 9.0  , 10.0 , 11.0]  ,
            [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0]  ,
            [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0]  ,
            [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0]  ,
            [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0]  ,
            [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0]  ,
            [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0]  ,
            [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] ,
                 'data_description': [('time', 'number', 'time'),
              ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}),
              ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}),
              ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}),
              ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}),
              ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}),
              ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}),
              ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}),
              ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}),
              ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}),
              ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}),
              ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}),
              ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})],
             'viz_product_type': 'google_dt'}
        self.assertEquals(rdt['google_dt_components'][0], testval)
    def test_add_parameter_to_data_product(self):
        #self.preload_ui()
        self.test_add_parameter_function()
        data_product_id = self.data_product_id
        stream_def_id = self.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0]
        pdict_id = self.resource_registry.find_objects(stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0]
        # Create a new data product htat represents the L1 temp from the ctd simulator
        dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED)
        stream_def_id = self.pubsub_management.create_stream_definition(name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        dp_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id)

        parameter_function = ParameterFunction(name='linear_corr',
                                               function_type=PFT.NUMEXPR,
                                               function='a * x + b',
                                               args=['x','a','b'])
        pf_id = self.dataset_management.create_parameter_function(parameter_function)

        dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction')
        self.data_process_management.create_data_process_definition(dpd, pf_id)

        parameter = ParameterContext(name='temperature_corrected',
                                     parameter_type='function',
                                     parameter_function_id=pf_id,
                                     parameter_function_map={'x':'temp', 'a':1.03, 'b':0.25},
                                     value_encoding='float32',
                                     units='deg_C',
                                     display_name='Temperature Corrected')
        p_id = self.dataset_management.create_parameter(parameter)

        # Add it to the parent or parsed data product
        self.data_product_management.add_parameter_to_data_product(p_id, data_product_id)
        
        # Then update the child's stream definition to include it
        stream_def = self.pubsub_management.read_stream_definition(stream_def_id)
        stream_def.available_fields.append('temperature_corrected')
        self.resource_registry.update(stream_def)
        

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)

        # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save...
        gevent.sleep(10)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['temperature_corrected'], np.arange(30,dtype=np.float32) * 1.03 + 0.25, decimal=5)
Example #37
0
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
    def test_coefficient_compatibility(self):
        data_product_id = self.create_data_product(name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = [10] * 10
        rdt['cc_coefficient'] = [2] * 10
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())

        rdt2 = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id))
        np.testing.assert_array_equal(rdt2['offset'],[12]*10)
Example #39
0
    def rdt_to_granule(self, context, value_array, comp_val=None):

        pdict = ParameterDictionary()
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #40
0
    def rdt_to_granule(self, context, value_array, comp_val=None):

        pdict = ParameterDictionary()
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt["test"] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2["test"]

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #41
0
    def write_to_data_product(self,data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name 
            return
        rdt[time_param] = np.arange(40)


        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id,40)


        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
Example #42
0
    def check_presta_instrument_data_products(self, reference_designator):
        # Check the parsed data product make sure it's got everything it needs and can be published persisted etc.

        # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc
        # Let's go ahead and publish some fake data!!!
        # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf
        # Appendix A. Example 1.
        # p_psia_tide = 14.8670
        # the tide should be 10.2504
        passing = True

        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800.

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['absolute_pressure'] = [14.8670]
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(
            dataset_monitor.wait())  # Bumped to 20 to keep buildbot happy
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        passing &= self.assert_array_almost_equal(rdt['absolute_pressure'],
                                                  [14.8670], 4)

        return passing
Example #43
0
 def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40):
     '''
     Loops until there is a sufficient amount of data in the dataset
     '''
     done = False
     with gevent.Timeout(40):
         while not done:
             granule = self.data_retriever.retrieve_last_data_points(
                 dataset_id, 1)
             rdt = RecordDictionaryTool.load_from_granule(granule)
             extents = self.dataset_management.dataset_extents(
                 dataset_id, rdt._pdict.temporal_parameter_name)[0]
             if rdt[rdt._pdict.temporal_parameter_name] and rdt[
                     rdt._pdict.
                     temporal_parameter_name][0] != rdt._pdict.get_context(
                         rdt._pdict.temporal_parameter_name
                     ).fill_value and extents >= data_size:
                 done = True
             else:
                 gevent.sleep(0.2)
Example #44
0
    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id,
                                                   query={
                                                       'start_time': st,
                                                       'end_time': et
                                                   })
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})
Example #45
0
    def rdt_to_granule(self, context, value_array, comp_val=None):
        time = ParameterContext(name='time', param_type=QuantityType(value_encoding=np.float64))
        
        pdict = ParameterDictionary()
        pdict.add_context(time, is_temporal=True)
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['time'] = np.arange(len(value_array))
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #46
0
    def rdt_to_granule(self, context, value_array, comp_val=None):
        time = ParameterContext(
            name='time', param_type=QuantityType(value_encoding=np.float64))

        pdict = ParameterDictionary()
        pdict.add_context(time, is_temporal=True)
        pdict.add_context(context)

        rdt = RecordDictionaryTool(param_dictionary=pdict)
        rdt['time'] = np.arange(len(value_array))
        rdt['test'] = value_array

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)

        testval = comp_val if comp_val is not None else value_array
        actual = rdt2['test']

        if isinstance(testval, basestring):
            self.assertEquals(testval, actual)
        else:
            np.testing.assert_array_equal(testval, actual)
Example #47
0
    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]])
        return passing
Example #48
0
    def test_add_parameter_to_data_product(self):
        #self.preload_ui()
        self.test_add_parameter_function()
        data_product_id = self.data_product_id
        stream_def_id = self.resource_registry.find_objects(
            data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0]
        pdict_id = self.resource_registry.find_objects(
            stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0]
        # Create a new data product htat represents the L1 temp from the ctd simulator
        dp = DataProduct(name='CTD Simulator TEMPWAT L1',
                         category=DataProductTypeEnum.DERIVED)
        stream_def_id = self.pubsub_management.create_stream_definition(
            name='tempwat_l1',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        dp_id = self.data_product_management.create_data_product(
            dp,
            stream_definition_id=stream_def_id,
            parent_data_product_id=data_product_id)

        parameter_function = ParameterFunction(name='linear_corr',
                                               function_type=PFT.NUMEXPR,
                                               function='a * x + b',
                                               args=['x', 'a', 'b'])
        pf_id = self.dataset_management.create_parameter_function(
            parameter_function)

        dpd = DataProcessDefinition(name='linear_corr',
                                    description='Linear Correction')
        self.data_process_management.create_data_process_definition(dpd, pf_id)

        parameter = ParameterContext(name='temperature_corrected',
                                     parameter_type='function',
                                     parameter_function_id=pf_id,
                                     parameter_function_map={
                                         'x': 'temp',
                                         'a': 1.03,
                                         'b': 0.25
                                     },
                                     value_encoding='float32',
                                     units='deg_C',
                                     display_name='Temperature Corrected')
        p_id = self.dataset_management.create_parameter(parameter)

        # Add it to the parent or parsed data product
        self.data_product_management.add_parameter_to_data_product(
            p_id, data_product_id)

        # Then update the child's stream definition to include it
        stream_def = self.pubsub_management.read_stream_definition(
            stream_def_id)
        stream_def.available_fields.append('temperature_corrected')
        self.resource_registry.update(stream_def)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)

        # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save...
        gevent.sleep(10)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(
            rdt['temperature_corrected'],
            np.arange(30, dtype=np.float32) * 1.03 + 0.25,
            decimal=5)
Example #49
0
    def test_add_parameter_function(self):
        # req-tag: NEW SA - 31
        # Make a CTDBP Data Product
        data_product_id = self.make_ctd_data_product()
        self.data_product_id = data_product_id
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        # Throw some data in it
        rdt = self.ph.rdt_for_data_product(data_product_id)
        rdt['time'] = np.arange(30)
        rdt['temp'] = np.arange(30)
        rdt['pressure'] = np.arange(30)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.event.clear()

        #--------------------------------------------------------------------------------
        # This is what the user defines either via preload or through the UI
        #--------------------------------------------------------------------------------
        # Where the egg is
        egg_url = self.egg_url

        # Make a parameter function
        owner = 'ion_example.add_arrays'
        func = 'add_arrays'
        arglist = ['a', 'b']
        pf = ParameterFunction(name='add_arrays',
                               function_type=PFT.PYTHON,
                               owner=owner,
                               function=func,
                               args=arglist,
                               egg_uri=egg_url)
        pfunc_id = self.dataset_management.create_parameter_function(pf)
        #--------------------------------------------------------------------------------
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        pfunc_id)

        # Make a data process definition
        dpd = DataProcessDefinition(name='add_arrays',
                                    description='Sums two arrays')
        dpd_id = self.data_process_management.create_data_process_definition(
            dpd, pfunc_id)

        # TODO: assert assoc exists
        argmap = {'a': 'temp', 'b': 'pressure'}
        dp_id = self.data_process_management.create_data_process(
            dpd_id, [data_product_id],
            argument_map=argmap,
            out_param_name='array_sum')

        # Verify that the function worked!
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2))

        # Verify that we can inspect it as well
        source_code = self.data_process_management.inspect_data_process_definition(
            dpd_id)
        self.assertEquals(source_code,
                          'def add_arrays(a, b):\n    return a+b\n')

        url = self.data_process_management.get_data_process_definition_url(
            dpd_id)
        self.assertEquals(
            url,
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )

        dpd_ids, _ = self.resource_registry.find_resources(
            name='dataqc_spiketest',
            restype=RT.DataProcessDefinition,
            id_only=True)
        dpd_id = dpd_ids[0]
        url = self.data_process_management.get_data_process_definition_url(
            dpd_id)
        self.assertEquals(
            url,
            'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py'
        )
Example #50
0
    def test_granule(self):

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription(
            'sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name,
                          rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1: 1}

        publisher.publish(
            rdt.to_granule(data_producer_id='data_producer',
                           provider_metadata_update={1: 1}))

        self.assertTrue(self.event.wait(10))

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)

        filtered_stream_def_id = self.pubsub_management.create_stream_definition(
            'filtered',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields, ['time', 'temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k, v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None, None, None])
        self.assertTrue(rdt['time'] is None)

        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))
 def validator(msg, route, stream_id):
     rdt = RecordDictionaryTool.load_from_granule(msg)
     if not np.allclose(rdt['rho'], np.array([1001.0055034])):
         return
     validation_event.set()
Example #52
0
    def check_vel3d_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        self.assertEquals(pdict.name, 'vel3d_b_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        lat = 14.6846
        lon = -51.044
        ts = np.array([
            3319563600, 3319567200, 3319570800, 3319574400, 3319578000,
            3319581600, 3319585200, 3319588800, 3319592400, 3319596000
        ],
                      dtype=np.float)

        ve = np.array([-3.2, 0.1, 0., 2.3, -0.1, 5.6, 5.1, 5.8, 8.8, 10.3])

        vn = np.array([18.2, 9.9, 12., 6.6, 7.4, 3.4, -2.6, 0.2, -1.5, 4.1])
        vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5])
        ve_expected = np.array([
            -0.085136, -0.028752, -0.036007, 0.002136, -0.023158, 0.043218,
            0.056451, 0.054727, 0.088446, 0.085952
        ])
        vn_expected = np.array([
            0.164012, 0.094738, 0.114471, 0.06986, 0.07029, 0.049237,
            -0.009499, 0.019311, 0.012096, 0.070017
        ])
        vu_expected = np.array([
            -0.011, -0.006, -0.014, -0.02, -0.017, -0.02, 0.013, -0.016,
            -0.011, -0.045
        ])

        rdt['time'] = ts
        rdt['lat'] = [lat] * 10
        rdt['lon'] = [lon] * 10
        rdt['turbulent_velocity_east'] = ve
        rdt['turbulent_velocity_north'] = vn
        rdt['turbulent_velocity_up'] = vu

        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)
        return passing
Example #53
0
 def sync_rdt_with_granule(self, granule):
     rdt = RecordDictionaryTool.load_from_granule(granule)
     self.rdt = rdt
     return rdt
Example #54
0
    def check_rsn_instrument_data_product(self):
        passing = True
        # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products
        # Parsed, Engineering
        # SFLPRES-0 SFLPRES-1
        # Check for the two data products and make sure they have the proper parameters
        # SFLPRES-0 should
        data_products, _ = self.RR.find_resources_ext(
            alt_id_ns='PRE',
            alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID',
            id_only=True)
        passing &= self.assertTrue(len(data_products) == 1)
        if not data_products:
            return passing

        data_product_id = data_products[0]

        stream_defs, _ = self.RR.find_objects(data_product_id,
                                              PRED.hasStreamDefinition,
                                              id_only=False)
        passing &= self.assertTrue(len(stream_defs) == 1)
        if not stream_defs:
            return passing

        # Assert that the stream definition has the correct reference designator
        stream_def = stream_defs[0]
        passing &= self.assertEquals(
            stream_def.stream_configuration['reference_designator'],
            'RS03AXBS-MJ03A-06-PRESTA301')

        # Get the pdict and make sure that the parameters corresponding to the available fields
        # begin with the appropriate data product identifier

        pdict_ids, _ = self.RR.find_objects(stream_def,
                                            PRED.hasParameterDictionary,
                                            id_only=True)
        passing &= self.assertEquals(len(pdict_ids), 1)
        if not pdict_ids:
            return passing

        pdict_id = pdict_ids[0]

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_id)
        available_params = [
            pdict.get_context(i) for i in pdict.keys()
            if i in stream_def.available_fields
        ]
        for p in available_params:
            if p.name == 'time':  # Ignore the domain parameter
                continue
            passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES'))
        passing &= self.check_presta_instrument_data_products(
            'RS01SLBS-MJ01A-06-PRESTA101')
        passing &= self.check_vel3d_instrument_data_products(
            'RS01SLBS-MJ01A-12-VEL3DB101')
        passing &= self.check_presta_instrument_data_products(
            'RS03AXBS-MJ03A-06-PRESTA301')
        passing &= self.check_vel3d_instrument_data_products(
            'RS03AXBS-MJ03A-12-VEL3DB301')
        passing &= self.check_tempsf_instrument_data_product(
            'RS03ASHS-MJ03B-07-TMPSFA301')
        passing &= self.check_vel3d_instrument_data_products(
            'RS03INT2-MJ03D-12-VEL3DB304')
        passing &= self.check_trhph_instrument_data_products(
            'RS03INT1-MJ03C-10-TRHPHA301')

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)
        self.data_product_management.suspend_data_product_persistence(
            data_product_id)  # Should do nothing and not raise anything

        return passing
Example #55
0
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
    def test_single_device_single_site(self):
        from interface.objects import DataProductTypeEnum

        starting = str(calendar.timegm(datetime(2014, 1, 1, 0).timetuple()))
        site_1_id, device_1_id, dataset_1_id, deployment_1_id, param_dict_a, data_product_1_id = self.create_device_site_deployment(
            dep_name="Site 1 - Device 1", starting=starting)
        site = self.resource_registry.read(site_1_id)

        # Create SDPs
        # This logis is also in preload, but testing preload is painful.
        # Testing it manually here for now.
        for i, scfg in enumerate(site.stream_configurations):
            pdict = self.container.resource_registry.find_resources(
                name=scfg.parameter_dictionary_name,
                restype=RT.ParameterDictionary,
                id_only=False)[0][0]
            # Clone/Create the new ParameterDictionary
            del pdict._id
            del pdict._rev
            sdp_pdict_id, _ = self.container.resource_registry.create(pdict)
            stream_def_id = self.create_stream_definition(
                name='CTD 1 - SDP', parameter_dictionary_id=sdp_pdict_id)
            sdp_id = self.create_data_product(name="SDP_%d" % i,
                                              stream_def_id=stream_def_id,
                                              stream_configuration=scfg)
            self.activate_data_product(sdp_id)
            self.container.resource_registry.create_association(
                subject=site_1_id,
                predicate=PRED.hasOutputProduct,
                object=sdp_id,
                assoc_type=RT.DataProduct)
            sdp = self.resource_registry.read(sdp_id)
            sdp.category = DataProductTypeEnum.SITE
            self.resource_registry.update(sdp)

        self.observatory_management.activate_deployment(
            deployment_id=deployment_1_id)

        # Get Deployment start time
        deployment_obj = self.resource_registry.read(deployment_1_id)
        for constraint in deployment_obj.constraint_list:
            if constraint.type_ == OT.TemporalBounds:
                assert constraint.start_datetime == starting

        # Get information about the new SiteDataProduct that should have been created
        site_data_product_1_id = self.resource_registry.find_objects(
            site_1_id, PRED.hasOutputProduct, id_only=True)[0][0]
        stream_def_2_id = self.resource_registry.find_objects(
            site_data_product_1_id, PRED.hasStreamDefinition,
            id_only=True)[0][0]
        param_dict_b = self.resource_registry.find_objects(
            stream_def_2_id, PRED.hasParameterDictionary)[0][0]

        # Check associations
        self.assertEquals(
            self.resource_registry.find_objects(site_1_id,
                                                PRED.hasDevice,
                                                id_only=True)[0][0],
            device_1_id)
        self.assertEquals(
            self.resource_registry.find_objects(site_1_id,
                                                PRED.hasDeployment,
                                                id_only=True)[0][0],
            deployment_1_id)
        self.assertEquals(
            self.resource_registry.find_objects(device_1_id,
                                                PRED.hasDeployment,
                                                id_only=True)[0][0],
            deployment_1_id)
        self.assertEquals(
            self.resource_registry.find_objects(device_1_id,
                                                PRED.hasOutputProduct,
                                                id_only=True)[0][0],
            data_product_1_id)
        self.assertEquals(
            self.resource_registry.find_objects(site_1_id,
                                                PRED.hasOutputProduct,
                                                id_only=True)[0][0],
            site_data_product_1_id)

        site_data_product_1 = self.resource_registry.find_objects(
            site_1_id, PRED.hasOutputProduct)[0][0]
        self.assertEquals(site_data_product_1.category,
                          DataProductTypeEnum.SITE)
        self.assertEquals(len(site_data_product_1.dataset_windows), 1)
        assert site_data_product_1.dataset_windows[
            0].dataset_id == dataset_1_id
        assert site_data_product_1.dataset_windows[
            0].bounds.start_datetime == starting
        assert site_data_product_1.dataset_windows[0].bounds.end_datetime == ''

        # Check that param dicts have equal members
        self.assertEquals(param_dict_a.name, param_dict_b.name)

        self.observatory_management.deactivate_deployment(
            deployment_id=deployment_1_id)

        # Verify the window has an ending time
        site_data_product_1 = self.resource_registry.find_objects(
            site_1_id, PRED.hasOutputProduct)[0][0]
        self.assertEquals(site_data_product_1.category,
                          DataProductTypeEnum.SITE)
        self.assertEquals(len(site_data_product_1.dataset_windows), 1)
        assert site_data_product_1.dataset_windows[
            0].dataset_id == dataset_1_id
        assert site_data_product_1.dataset_windows[
            0].bounds.start_datetime == starting
        assert int(site_data_product_1.dataset_windows[0].bounds.end_datetime
                   ) - calendar.timegm(datetime.utcnow().timetuple()) < 10

        # Verify that data is there
        granule = self.data_retriever.retrieve(dataset_1_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_allclose(rdt['time'], np.arange(4))
        np.testing.assert_allclose(rdt['temp'], np.arange(10, 14))
Example #57
0
    def check_trhph_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing

        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        passing &= self.assertEquals(pdict.name, 'trhph_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        # calibration constants
        a = 1.98e-9
        b = -2.45e-6
        c = 9.28e-4
        d = -0.0888
        e = 0.731

        V_s = 1.506
        V_c = 0.
        T = 11.8

        r1 = 0.906
        r2 = 4.095
        r3 = 4.095

        ORP_V = 1.806
        Cl = np.nan

        offset = 2008
        gain = 4.0
        # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1...
        ORP = ((ORP_V * 1000.) - offset) / gain

        ntp_now = time.time() + 2208988800

        rdt['cc_a'] = [a]
        rdt['cc_b'] = [b]
        rdt['cc_c'] = [c]
        rdt['cc_d'] = [d]
        rdt['cc_e'] = [e]
        rdt['ref_temp_volts'] = [V_s]
        rdt['resistivity_temp_volts'] = [V_c]
        rdt['eh_sensor'] = [ORP_V]
        rdt['resistivity_5'] = [r1]
        rdt['resistivity_x1'] = [r2]
        rdt['resistivity_x5'] = [r3]
        rdt['cc_offset'] = [offset]
        rdt['cc_gain'] = [gain]
        rdt['time'] = [ntp_now]

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        return passing