def build_param_contexts(self):
        context_ids = []
        t_ctxt = ParameterContext(
            'time', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1970'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='time', parameter_context=t_ctxt.dump()))

        cnd_ctxt = ParameterContext('conductivity', param_type=ArrayType())
        cnd_ctxt.uom = 'mmho/cm'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='conductivity', parameter_context=cnd_ctxt.dump()))

        temp_ctxt = ParameterContext('temperature', param_type=ArrayType())
        temp_ctxt.uom = 'degC'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='temperature', parameter_context=temp_ctxt.dump()))

        press_ctxt = ParameterContext('pressure', param_type=ArrayType())
        press_ctxt.uom = 'decibars'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='pressure', parameter_context=press_ctxt.dump()))

        oxy_ctxt = ParameterContext('oxygen', param_type=ArrayType())
        oxy_ctxt.uom = 'Hz'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='oxygen', parameter_context=oxy_ctxt.dump()))

        return context_ids
Esempio n. 2
0
 def param_type(self, s):
     if s == 'record':
         return RecordType()
     elif s == 'array':
         return ArrayType()
     else:
         return QuantityType(value_encoding=np.dtype(s))
Esempio n. 3
0
 def get_array_type(self, parameter_type=None, encoding=None):
     if encoding in ('string', 'char', 'str', '', 'opaque'):
         return self.get_string_type()
     if 'int' in encoding:
         fill = -9999
     elif 'float' in encoding:
         fill = np.nan
     else:
         raise TypeError("Unknown encoding for array types %s" % encoding)
     return ArrayType(inner_encoding=encoding, inner_fill_value=fill)
    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.wait())

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   19 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2))
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)
    def _create_param_contexts(self):
        context_ids = []
        t_ctxt = ParameterContext(
            'ingestion_timestamp',
            param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.uom = 'seconds since 1900-01-01'
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management_client.create_parameter_context(
            name='ingestion_timestamp', parameter_context=t_ctxt.dump())
        context_ids.append(t_ctxt_id)

        raw_ctxt = ParameterContext('raw', param_type=ArrayType())
        raw_ctxt.uom = ''
        context_ids.append(
            self.dataset_management_client.create_parameter_context(
                name='raw', parameter_context=raw_ctxt.dump()))

        return context_ids, t_ctxt_id
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)
Esempio n. 7
0
 def get_array_type(self, parameter_type=None, encoding=None):
     if encoding in ('str', '', 'opaque'):
         encoding = None
     return ArrayType(inner_encoding=encoding)
Esempio n. 8
0
 def get_string_array_lookup_value(self, value):
     return self._placeholder(
         value, lambda placeholder: ParameterContext(
             name=placeholder,
             param_type=SparseConstantType(base_type=ArrayType())))
Esempio n. 9
0
 def get_array_lookup_value(self, value):
     return self._placeholder(
         value, lambda placeholder: ParameterContext(
             name=placeholder,
             param_type=SparseConstantType(base_type=ArrayType(
                 inner_encoding='float64', inner_fill_value=-9999.))))
Esempio n. 10
0
    def build_param_contexts(self):
        context_ids = []
        t_ctxt = ParameterContext(
            'time', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1970'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='time', parameter_context=t_ctxt.dump()))

        ut_ctxt = ParameterContext(
            'upload_time',
            param_type=QuantityType(value_encoding=np.dtype('int64')))
        ut_ctxt.uom = 'seconds since 01-01-1970'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='upload_time', parameter_context=ut_ctxt.dump()))

        vela_ctxt = ParameterContext('VelA', param_type=ArrayType())
        vela_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='VelA', parameter_context=vela_ctxt.dump()))

        velb_ctxt = ParameterContext('VelB', param_type=ArrayType())
        velb_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='VelB', parameter_context=velb_ctxt.dump()))

        velc_ctxt = ParameterContext('VelC', param_type=ArrayType())
        velc_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='VelC', parameter_context=velc_ctxt.dump()))

        veld_ctxt = ParameterContext('VelD', param_type=ArrayType())
        veld_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='VelD', parameter_context=veld_ctxt.dump()))

        mx_ctxt = ParameterContext('Mx', param_type=ArrayType())
        mx_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='Mx', parameter_context=mx_ctxt.dump()))

        my_ctxt = ParameterContext('My', param_type=ArrayType())
        my_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='My', parameter_context=my_ctxt.dump()))

        mz_ctxt = ParameterContext('Mz', param_type=ArrayType())
        mz_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='Mz', parameter_context=mz_ctxt.dump()))

        pitch_ctxt = ParameterContext('Pitch', param_type=ArrayType())
        pitch_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='Pitch', parameter_context=pitch_ctxt.dump()))

        roll_ctxt = ParameterContext('Roll', param_type=ArrayType())
        roll_ctxt.uom = 'unknown'
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='Roll', parameter_context=roll_ctxt.dump()))

        return context_ids
Esempio n. 11
0
def build_contexts():
    '''
    Builds the relevant parameter context objects
    '''

    contexts = []

    cond_ctxt = ParameterContext(
        'conductivity', param_type=QuantityType(value_encoding=np.float32))
    cond_ctxt.uom = 'unknown'
    cond_ctxt.fill_value = 0e0
    contexts.append(cond_ctxt)

    pres_ctxt = ParameterContext(
        'pressure', param_type=QuantityType(value_encoding=np.float32))
    pres_ctxt.uom = 'Pascal'
    pres_ctxt.fill_value = 0x0
    contexts.append(pres_ctxt)

    sal_ctxt = ParameterContext(
        'salinity', param_type=QuantityType(value_encoding=np.float32))
    sal_ctxt.uom = 'PSU'
    sal_ctxt.fill_value = 0x0
    contexts.append(sal_ctxt)

    den_ctxt = ParameterContext(
        'density', param_type=QuantityType(value_encoding=np.float32))
    den_ctxt.uom = 'kg/m3'
    den_ctxt.fill_value = 0x0
    contexts.append(den_ctxt)

    temp_ctxt = ParameterContext(
        'temp', param_type=QuantityType(value_encoding=np.float32))
    temp_ctxt.uom = 'degree_Celsius'
    temp_ctxt.fill_value = 0e0
    contexts.append(temp_ctxt)

    t_ctxt = ParameterContext('time',
                              param_type=QuantityType(value_encoding=np.int64))
    t_ctxt.uom = 'seconds since 1970-01-01'
    t_ctxt.fill_value = 0x0
    contexts.append(t_ctxt)

    lat_ctxt = ParameterContext(
        'lat', param_type=QuantityType(value_encoding=np.float32))
    lat_ctxt.axis = AxisTypeEnum.LAT
    lat_ctxt.uom = 'degree_north'
    lat_ctxt.fill_value = 0e0
    contexts.append(lat_ctxt)

    lon_ctxt = ParameterContext(
        'lon', param_type=QuantityType(value_encoding=np.float32))
    lon_ctxt.axis = AxisTypeEnum.LON
    lon_ctxt.uom = 'degree_east'
    lon_ctxt.fill_value = 0e0
    contexts.append(lon_ctxt)

    raw_ctxt = ParameterContext('raw', param_type=ArrayType())
    raw_ctxt.description = 'raw binary string values'
    raw_ctxt.uom = 'utf-8 byte string'
    raw_ctxt.fill_value = 0x0
    contexts.append(raw_ctxt)

    port_ts_ctxt = ParameterContext(
        name='port_timestamp',
        param_type=QuantityType(value_encoding=np.float64))
    port_ts_ctxt._derived_from_name = 'time'
    port_ts_ctxt.uom = 'seconds'
    port_ts_ctxt.fill_value = -1
    contexts.append(port_ts_ctxt)

    driver_ts_ctxt = ParameterContext(
        name='driver_timestamp',
        param_type=QuantityType(value_encoding=np.float64))
    driver_ts_ctxt._derived_from_name = 'time'
    driver_ts_ctxt.uom = 'seconds'
    driver_ts_ctxt.fill_value = -1
    contexts.append(driver_ts_ctxt)

    internal_ts_ctxt = ParameterContext(
        name='internal_timestamp',
        param_type=QuantityType(value_encoding=np.float64))
    internal_ts_ctxt._derived_from_name = 'time'
    internal_ts_ctxt.uom = 'seconds'
    internal_ts_ctxt.fill_value = -1
    contexts.append(internal_ts_ctxt)

    timer_num_ctxt = ParameterContext(
        name='timer', param_type=QuantityType(value_encoding=np.float64))
    timer_num_ctxt.fill_value = -1
    contexts.append(timer_num_ctxt)

    serial_num_ctxt = ParameterContext(
        name='serial_num', param_type=QuantityType(value_encoding=np.int32))
    serial_num_ctxt.fill_value = -1
    contexts.append(serial_num_ctxt)

    count_ctxt = ParameterContext(
        name='counts', param_type=QuantityType(value_encoding=np.uint64))
    count_ctxt.fill_value = -1
    contexts.append(count_ctxt)

    checksum_ctxt = ParameterContext(
        name='checksum', param_type=QuantityType(value_encoding=np.int32))
    checksum_ctxt.fill_value = -1
    contexts.append(checksum_ctxt)

    pref_ts_ctxt = ParameterContext(
        name='preferred_timestamp',
        param_type=QuantityType(value_encoding=np.uint64))
    pref_ts_ctxt.description = 'name of preferred timestamp'
    pref_ts_ctxt.fill_value = 0x0
    contexts.append(pref_ts_ctxt)

    # TODO: This should probably be of type CategoryType when implemented
    qual_flag_ctxt = ParameterContext(name='quality_flag',
                                      param_type=ArrayType())
    qual_flag_ctxt.description = 'flag indicating quality'
    qual_flag_ctxt.fill_value = None
    contexts.append(qual_flag_ctxt)

    viz_ts_ctxt = ParameterContext(
        name='viz_timestamp',
        param_type=QuantityType(value_encoding=np.float64))
    viz_ts_ctxt._derived_from_name = 'time'
    viz_ts_ctxt.uom = 'seconds'
    viz_ts_ctxt.fill_value = -1
    contexts.append(viz_ts_ctxt)

    viz_prod_type_ctxt = ParameterContext(name='viz_product_type',
                                          param_type=ArrayType())
    viz_prod_type_ctxt.fill_value = None
    contexts.append(viz_prod_type_ctxt)

    image_obj_ctxt = ParameterContext(name='image_obj', param_type=ArrayType())
    image_obj_ctxt.fill_value = None
    contexts.append(image_obj_ctxt)

    image_name_ctxt = ParameterContext(name='image_name',
                                       param_type=ArrayType())
    contexts.append(image_name_ctxt)

    content_type_ctxt = ParameterContext(name='content_type',
                                         param_type=ArrayType())
    contexts.append(content_type_ctxt)

    gdt_ctxt = ParameterContext(name='google_dt_components',
                                param_type=RecordType())
    contexts.append(gdt_ctxt)

    mpl_ctxt = ParameterContext(name='mpl_graph', param_type=RecordType())
    contexts.append(mpl_ctxt)

    dummy_ctxt = ParameterContext(
        name='dummy', param_type=QuantityType(value_encoding=np.int64))
    contexts.append(dummy_ctxt)

    return contexts
Esempio n. 12
0
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream(
            'replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id,
            bb.coverage)  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()
Esempio n. 13
0
def get_array_type(parameter_type=None):
    return ArrayType()