def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dms_cli = DatasetManagementServiceClient() dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() pubsub_cli = PubsubManagementServiceClient() eda = ExternalDatasetAgent(handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls']) eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) dprov.contact.individual_names_given = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(protocol_type='FILE', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.individual_names_given = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'slocum_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) dset.dataset_description.parameters['base_url'] = 'test_data/slocum/' dset.dataset_description.parameters['list_pattern'] = 'ru05-2012-021-0-0-sbd.dat' dset.dataset_description.parameters['date_pattern'] = '%Y %j' dset.dataset_description.parameters['date_extraction_pattern'] = 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat' dset.dataset_description.parameters['temporal_dimension'] = None dset.dataset_description.parameters['zonal_dimension'] = None dset.dataset_description.parameters['meridional_dimension'] = None dset.dataset_description.parameters['vertical_dimension'] = None dset.dataset_description.parameters['variables'] = [ 'c_wpt_y_lmc', 'sci_water_cond', 'm_y_lmc', 'u_hd_fin_ap_inflection_holdoff', 'sci_m_present_time', 'm_leakdetect_voltage_forward', 'sci_bb3slo_b660_scaled', 'c_science_send_all', 'm_gps_status', 'm_water_vx', 'm_water_vy', 'c_heading', 'sci_fl3slo_chlor_units', 'u_hd_fin_ap_gain', 'm_vacuum', 'u_min_water_depth', 'm_gps_lat', 'm_veh_temp', 'f_fin_offset', 'u_hd_fin_ap_hardover_holdoff', 'c_alt_time', 'm_present_time', 'm_heading', 'sci_bb3slo_b532_scaled', 'sci_fl3slo_cdom_units', 'm_fin', 'x_cycle_overrun_in_ms', 'sci_water_pressure', 'u_hd_fin_ap_igain', 'sci_fl3slo_phyco_units', 'm_battpos', 'sci_bb3slo_b470_scaled', 'm_lat', 'm_gps_lon', 'sci_ctd41cp_timestamp', 'm_pressure', 'c_wpt_x_lmc', 'c_ballast_pumped', 'x_lmc_xy_source', 'm_lon', 'm_avg_speed', 'sci_water_temp', 'u_pitch_ap_gain', 'm_roll', 'm_tot_num_inflections', 'm_x_lmc', 'u_pitch_ap_deadband', 'm_final_water_vy', 'm_final_water_vx', 'm_water_depth', 'm_leakdetect_voltage', 'u_pitch_max_delta_battpos', 'm_coulomb_amphr', 'm_pitch', ] # Create DataSourceModel dsrc_model = DataSourceModel(name='slocum_model') # dsrc_model.model = 'SLOCUM' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in self._create_parameter_dictionary().iteritems(): pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump())) pdict_id = dms_cli.create_parameter_dictionary('slocum_param_dict', pc_list) streamdef_id = pubsub_cli.create_stream_definition(name="slocum_stream_def", description="stream def for slocum testing", parameter_dictionary_id=pdict_id) # dpms_cli.create_data_product() # Generate the data product and associate it to the ExternalDataset tdom, sdom = time_series_domain() tdom, sdom = tdom.dump(), sdom.dump() dprod = IonObject(RT.DataProduct, name='slocum_parsed_product', description='parsed slocum product', temporal_domain=tdom, spatial_domain=sdom) dproduct_id = dpms_cli.create_data_product(data_product=dprod, stream_definition_id=streamdef_id) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id})) # Create the logger for receiving publications _, stream_route, _ = self.create_stream_and_logger(name='slocum', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'stream_route': stream_route, 'stream_def': streamdef_id, 'external_dataset_res': dset, 'data_producer_id': dproducer_id, # CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 20, }
class TestCTDPChain(IonIntegrationTestCase): def setUp(self): super(TestCTDPChain, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 self.cnt = 0 # Cleanup of queue created by the subscriber self.queue_cleanup = [] self.data_process_cleanup = [] def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def clean_queues(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def cleaning_operations(self): for dproc_id in self.data_process_cleanup: self.data_process_management.delete_data_process(dproc_id) def test_ctdp_chain(self): """ Test that packets are processed by a chain of CTDP transforms: L0, L1 and L2 """ #------------------------------------------------------------------------------------- # Prepare the stream def to be used for transform chain #------------------------------------------------------------------------------------- #todo Check whether the right parameter dictionary is being used self._prepare_stream_def_for_transform_chain() #------------------------------------------------------------------------------------- # Prepare the data proc defs and in and out data products for the transforms #------------------------------------------------------------------------------------- # list_args_L0 = [data_proc_def_id, input_dpod_id, output_dpod_id] list_args_L0 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L0') list_args_L1 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L1') list_args_L2_density = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_density') list_args_L2_salinity = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_salinity') log.debug("Got the following args: L0 = %s, L1 = %s, L2 density = %s, L2 salinity = %s", list_args_L0, list_args_L1, list_args_L2_density, list_args_L2_salinity ) #------------------------------------------------------------------------------------- # Launch the CTDP transforms #------------------------------------------------------------------------------------- L0_data_proc_id = self._launch_transform('L0', *list_args_L0) L1_data_proc_id = self._launch_transform('L1', *list_args_L1) L2_density_data_proc_id = self._launch_transform('L2_density', *list_args_L2_density) L2_salinity_data_proc_id = self._launch_transform('L2_salinity', *list_args_L2_salinity) log.debug("Launched the transforms: L0 = %s, L1 = %s", L0_data_proc_id, L1_data_proc_id) #------------------------------------------------------------------------- # Start a subscriber listening to the output of each of the transforms #------------------------------------------------------------------------- ar_L0 = self.start_subscriber_listening_to_L0_transform(out_data_prod_id=list_args_L0[2]) ar_L1 = self.start_subscriber_listening_to_L1_transform(out_data_prod_id=list_args_L1[2]) ar_L2_density = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_density[2]) ar_L2_salinity = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_salinity[2]) #------------------------------------------------------------------- # Publish the parsed packets that the L0 transform is listening for #------------------------------------------------------------------- stream_id, stream_route = self.get_stream_and_route_for_data_prod(data_prod_id= list_args_L0[1]) self._publish_for_L0_transform(stream_id, stream_route) #------------------------------------------------------------------- # Check the granules being outputted by the transforms #------------------------------------------------------------------- self._check_granule_from_L0_transform(ar_L0) self._check_granule_from_L1_transform(ar_L1) self._check_granule_from_L2_density_transform(ar_L2_density) self._check_granule_from_L2_salinity_transform(ar_L2_salinity) def _prepare_stream_def_for_transform_chain(self): # Get the stream definition for the stream using the parameter dictionary # pdict_id = self.dataset_management.read_parameter_dictionary_by_name(parameter_dict_name, id_only=True) pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_for_L0') self.in_stream_def_id_for_L0 = self.pubsub.create_stream_definition(name='stream_def_for_L0', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, self.in_stream_def_id_for_L0) pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'params_for_other_transforms') self.stream_def_id = self.pubsub.create_stream_definition(name='stream_def_for_CTDBP_transforms', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, self.stream_def_id) log.debug("Got the parsed parameter dictionary: id: %s", pdict_id) log.debug("Got the stream def for parsed input to L0: %s", self.in_stream_def_id_for_L0) log.debug("Got the stream def for other other streams: %s", self.stream_def_id) def _prepare_things_you_need_to_launch_transform(self, name_of_transform = ''): module, class_name = self._get_class_module(name_of_transform) #------------------------------------------------------------------------- # Data Process Definition #------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name= 'CTDBP_%s_Transform' % name_of_transform, description= 'Data Process Definition for the CTDBP %s transform.' % name_of_transform, module= module, class_name=class_name) data_proc_def_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, data_proc_def_id) log.debug("created data process definition: id = %s", data_proc_def_id) #------------------------------------------------------------------------- # Construct temporal and spatial Coordinate Reference System objects for the data product objects #------------------------------------------------------------------------- tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() #------------------------------------------------------------------------- # Get the names of the input and output data products #------------------------------------------------------------------------- input_dpod_id = '' output_dpod_id = '' if name_of_transform == 'L0': input_dpod_id = self._create_input_data_product('parsed', tdom, sdom) output_dpod_id = self._create_output_data_product('L0', tdom, sdom) self.in_prod_for_L1 = output_dpod_id elif name_of_transform == 'L1': input_dpod_id = self.in_prod_for_L1 output_dpod_id = self._create_output_data_product('L1', tdom, sdom) self.in_prod_for_L2 = output_dpod_id elif name_of_transform == 'L2_density': input_dpod_id = self.in_prod_for_L2 output_dpod_id = self._create_output_data_product('L2_density', tdom, sdom) elif name_of_transform == 'L2_salinity': input_dpod_id = self.in_prod_for_L2 output_dpod_id = self._create_output_data_product('L2_salinity', tdom, sdom) else: self.fail("something bad happened") return [data_proc_def_id, input_dpod_id, output_dpod_id] def _get_class_module(self, name_of_transform): options = {'L0' : self._class_module_L0, 'L1' : self._class_module_L1, 'L2_density' : self._class_module_L2_density, 'L2_salinity' : self._class_module_L2_salinity} return options[name_of_transform]() def _class_module_L0(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L0' class_name = 'CTDBP_L0_all' return module, class_name def _class_module_L1(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L1' class_name = 'CTDBP_L1_Transform' return module, class_name def _class_module_L2_density(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_density' class_name = 'CTDBP_DensityTransform' return module, class_name def _class_module_L2_salinity(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity' class_name = 'CTDBP_SalinityTransform' return module, class_name def _create_input_data_product(self, name_of_transform = '', tdom = None, sdom = None): dpod_obj = IonObject(RT.DataProduct, name='dprod_%s' % name_of_transform, description='for_%s' % name_of_transform, temporal_domain = tdom, spatial_domain = sdom) log.debug("the stream def id: %s", self.stream_def_id) if name_of_transform == 'L0': stream_def_id = self.in_stream_def_id_for_L0 else: stream_def_id = self.stream_def_id dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj, stream_definition_id= stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id) log.debug("got the data product out. id: %s", dpod_id) return dpod_id def _create_output_data_product(self, name_of_transform = '', tdom = None, sdom = None): dpod_obj = IonObject(RT.DataProduct, name='dprod_%s' % name_of_transform, description='for_%s' % name_of_transform, temporal_domain = tdom, spatial_domain = sdom) if name_of_transform == 'L0': stream_def_id = self.in_stream_def_id_for_L0 else: stream_def_id = self.stream_def_id dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj, stream_definition_id=stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id) return dpod_id def _launch_transform(self, name_of_transform = '', data_proc_def_id = None, input_dpod_id = None, output_dpod_id = None): # We need the key name here to be "L2_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L2_stream when the config is used to launch the data process if name_of_transform in ['L0', 'L1']: binding = '%s_stream' % name_of_transform elif name_of_transform == 'L2_salinity': binding = 'salinity' elif name_of_transform == 'L2_density': binding = 'density' config = None if name_of_transform == 'L1': config = self._create_calibration_coefficients_dict() elif name_of_transform == 'L2_density': config = DotDict() config.process = {'lat' : 32.7153, 'lon' : 117.1564} log.debug("launching transform for name: %s",name_of_transform ) log.debug("launching transform for data_proc_def_id: %s\ninput_dpod_id: %s\noutput_dpod_id: %s", data_proc_def_id, input_dpod_id, output_dpod_id ) data_proc_id = self.data_process_management.create_data_process( data_process_definition_id = data_proc_def_id, in_data_product_ids= [input_dpod_id], out_data_product_ids = [output_dpod_id], configuration = config) self.addCleanup(self.data_process_management.delete_data_process, data_proc_id) self.data_process_management.activate_data_process(data_proc_id) self.addCleanup(self.data_process_management.deactivate_data_process, data_proc_id) log.debug("Created a data process for ctdbp %s transform: id = %s", name_of_transform, data_proc_id) return data_proc_id def get_stream_and_route_for_data_prod(self, data_prod_id = ''): stream_ids, _ = self.resource_registry.find_objects(data_prod_id, PRED.hasStream, RT.Stream, True) stream_id = stream_ids[0] input_stream = self.resource_registry.read(stream_id) stream_route = input_stream.stream_route return stream_id, stream_route def start_subscriber_listening_to_L0_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L0 = self._start_subscriber_to_transform( name_of_transform = 'L0',stream_id=output_stream_id_of_transform) return ar_L0 def start_subscriber_listening_to_L1_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L1 = self._start_subscriber_to_transform( name_of_transform = 'L1',stream_id=output_stream_id_of_transform) return ar_L1 def start_subscriber_listening_to_L2_density_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_density', stream_id=output_stream_id_of_transform) return ar_L2_density def start_subscriber_listening_to_L2_salinity_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_salinity',stream_id=output_stream_id_of_transform) return ar_L2_density def _start_subscriber_to_transform(self, name_of_transform = '', stream_id = ''): ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub_%s' % name_of_transform, callback=subscriber) # Note that this running the below line creates an exchange since none of that name exists before sub_id = self.pubsub.create_subscription('subscriber_to_transform_%s' % name_of_transform, stream_ids=[stream_id], exchange_name='sub_%s' % name_of_transform) self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) return ar def _check_granule_from_L0_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L0 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L0_algorithm(granule_from_transform) def _check_granule_from_L1_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L1 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L1_algorithm(granule_from_transform) def _check_granule_from_L2_density_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L2 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L2_density_algorithm(granule_from_transform) def _check_granule_from_L2_salinity_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L2 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L2_salinity_algorithm(granule_from_transform) def _check_application_of_L0_algorithm(self, granule = None): """ Check the algorithm applied by the L0 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'pressure', 'conductivity', 'temperature'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L1_algorithm(self, granule = None): """ Check the algorithm applied by the L1 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = [ 'time', 'pressure', 'conductivity', 'temp'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L2_density_algorithm(self, granule = None): """ Check the algorithm applied by the L2 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'density'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L2_salinity_algorithm(self, granule = None): """ Check the algorithm applied by the L2 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'salinity'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _publish_for_L0_transform(self, input_stream_id = None, stream_route = None): #----------- Publish on that stream so that the transform can receive it -------------------------------- self._publish_to_transform(input_stream_id, stream_route ) def _publish_to_transform(self, stream_id = '', stream_route = None): pub = StandaloneStreamPublisher(stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = 'Siemens_per_meter' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = 'Pascal' pdict.add_context(pres_ctxt) if parameter_dict_name == 'input_param_for_L0': temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) else: temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = 'degree_kelvin' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = 'g/m' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = 'PSU' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) if parameter_dict_name == 'input_param_for_L0': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) elif pc[1].name == 'temp': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def _create_calibration_coefficients_dict(self): config = DotDict() config.process.calibration_coeffs = { 'temp_calibration_coeffs': { 'TA0' : 1.561342e-03, 'TA1' : 2.561486e-04, 'TA2' : 1.896537e-07, 'TA3' : 1.301189e-07, 'TOFFSET' : 0.000000e+00 }, 'cond_calibration_coeffs': { 'G' : -9.896568e-01, 'H' : 1.316599e-01, 'I' : -2.213854e-04, 'J' : 3.292199e-05, 'CPCOR' : -9.570000e-08, 'CTCOR' : 3.250000e-06, 'CSLOPE' : 1.000000e+00 }, 'pres_calibration_coeffs' : { 'PA0' : 4.960417e-02, 'PA1' : 4.883682e-04, 'PA2' : -5.687309e-12, 'PTCA0' : 5.249802e+05, 'PTCA1' : 7.595719e+00, 'PTCA2' : -1.322776e-01, 'PTCB0' : 2.503125e+01, 'PTCB1' : 5.000000e-05, 'PTCB2' : 0.000000e+00, 'PTEMPA0' : -6.431504e+01, 'PTEMPA1' : 5.168177e+01, 'PTEMPA2' : -2.847757e-01, 'POFFSET' : 0.000000e+00 } } return config
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.pdicts = {} self.queue_cleanup = list() self.exchange_cleanup = list() self.context_ids = set() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() self.cleanup_contexts() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id) # Make sure there is an assoc self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test Reading stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id) self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary)) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) @unittest.skip('Needs to be refactored for cleanup') def test_validate_stream_defs(self): self.addCleanup(self.cleanup_contexts) #test no input incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test input with no output incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test l1 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test l2 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test Ln from L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L2 from L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange') self.addCleanup(self.pubsub_management.delete_topic, topic_id) self.exchange_cleanup.append('test_exchange') topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside') self.addCleanup(self.pubsub_management.delete_topic, topic2_id) stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics,[topic_id]) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name,'test_stream') self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) def test_data_product_subscription(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='ctd parsed') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id]) self.addCleanup(self.pubsub_management.delete_subscription, subscription_id) validated = Event() def validation(msg, route, stream_id): validated.set() stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) dp_stream_id = stream_ids.pop() validator = StandaloneStreamSubscriber('validator', callback=validation) validator.start() self.addCleanup(validator.stop) self.pubsub_management.activate_subscription(subscription_id) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id) route = self.pubsub_management.read_stream_route(dp_stream_id) publisher = StandaloneStreamPublisher(dp_stream_id, route) publisher.publish('hi') self.assertTrue(validated.wait(10)) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id) subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue') self.exchange_cleanup.append('test_exchange') subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertEquals(subs,[stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True) self.assertEquals(len(res),1) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription(subscription_id) self.assertEquals(subscription.exchange_name, 'test_queue') self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving before activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m,r,s): self.assertEquals(m,'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream('test_stream','xp1') xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream('stream1','xp1') subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m,r,s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1',verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp') self.exchange_cleanup.append('test_xp') topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name,'test_topic') self.assertEquals(topic.exchange_point, 'test_xp') self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m,r,s): self.sub1_sat.set() def subscriber2(m,r,s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) sub2 = StandaloneStreamSubscriber('sub2', subscriber2) sub2.start() self.addCleanup(sub2.stop) log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, log_topic) science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, science_topic) events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events') self.addCleanup(self.pubsub_management.delete_topic, events_topic) log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, log_stream) ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, ctd_stream) event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events') self.addCleanup(self.pubsub_management.delete_stream, event_stream) raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data') self.addCleanup(self.pubsub_management.delete_stream, raw_stream) subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) self.pubsub_management.activate_subscription(subscription1) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1) self.pubsub_management.activate_subscription(subscription2) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream,1) self.assertTrue(self.sub1_sat.wait(4)) def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m,r,s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_topic, topic1) topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic2) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic3) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic4) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic5) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic6) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic7) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_topic, topic8) topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) self.addCleanup(self.pubsub_management.delete_topic, topic9) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic10) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic11) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic12) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic13) self.exchange_cleanup.extend(['xp1','xp2']) stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_stream, stream1_id) stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream2_id) stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream3_id) stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream4_id) stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream5_id) subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1]) self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription3) subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription4) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id,1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id,2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10),4) self.pubsub_management.deactivate_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10),6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) #-------------------------------------------------------------------------------- def cleanup_contexts(self): for context_id in self.context_ids: self.dataset_management.delete_parameter_context(context_id) def add_context_to_cleanup(self, context_id): self.context_ids.add(context_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom) self.add_context_to_cleanup(t_ctxt_id) lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom) self.add_context_to_cleanup(lat_ctxt_id) lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom) self.add_context_to_cleanup(lon_ctxt_id) # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom) self.add_context_to_cleanup(temp_ctxt_id) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom) self.add_context_to_cleanup(cond_ctxt_id) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom) self.add_context_to_cleanup(press_ctxt_id) # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tl1_pmap = {'T': 'TEMPWAT_L0'} expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap) tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom) self.add_context_to_cleanup(tempL1_ctxt_id) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' cl1_pmap = {'C': 'CONDWAT_L0'} expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap) condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom) self.add_context_to_cleanup(condL1_ctxt_id) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap) presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom) self.add_context_to_cleanup(presL1_ctxt_id) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_kwargmap = None expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom) self.add_context_to_cleanup(sal_ctxt_id) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom) self.add_context_to_cleanup(dens_ctxt_id) ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id] contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt] context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values] pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values]) try: self.pdicts[pdict_name] return self.pdicts[pdict_name] except KeyError: pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) self.pdicts[pdict_name] = pdict_id return pdict_id
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.pids = [] self.event = Event() self.exchange_space_name = "test_granules" self.exchange_point_name = "science_data" self.i = 0 self.purge_queues() self.queue_buffer = [] self.streams = [] self.addCleanup(self.stop_all_ingestion) def purge_queues(self): xn = self.container.ex_manager.create_xn_queue("science_granule_ingestion") xn.purge() def tearDown(self): self.purge_queues() for pid in self.pids: self.container.proc_manager.terminate_process(pid) IngestionManagementIntTest.clean_subscriptions() for queue in self.queue_buffer: if isinstance(queue, ExchangeNameQueue): queue.delete() elif isinstance(queue, str): xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() # -------------------------------------------------------------------------------- # Helper/Utility methods # -------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=""): """ Creates a time-series dataset """ tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name( "ctd_parsed_param_dict", id_only=True ) dataset_id = self.dataset_management.create_dataset( "test_dataset_%i" % self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom, ) return dataset_id def get_datastore(self, dataset_id): """ Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. """ dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): """ Grab the ingestion configuration from the resource registry """ # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration, id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=""): """ Launch the producer """ pid = self.container.spawn_process( "better_data_producer", "ion.processes.data.example_data_producer", "BetterDataProducer", {"process": {"stream_id": stream_id}}, ) self.pids.append(pid) def make_simple_dataset(self): """ Makes a stream, a stream definition and a dataset, the essentials for most of these tests """ pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) stream_def_id = self.pubsub_management.create_stream_definition("ctd data", parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream( "ctd stream %i" % self.i, "xp1", stream_definition_id=stream_def_id ) dataset_id = self.create_dataset(pdict_id) self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self, stream_id, stream_route, offset=0): """ Publish deterministic data """ pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt["time"] = np.arange(10) + (offset * 10) rdt["temp"] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self, stream_id, route): """ Make four granules """ for i in xrange(4): self.publish_hifi(stream_id, route, i) def start_ingestion(self, stream_id, dataset_id): """ Starts ingestion/persistence for a given dataset """ ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id ) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id ) def stop_all_ingestion(self): try: [self.stop_ingestion(sid) for sid in self.streams] except: pass def validate_granule_subscription(self, msg, route, stream_id): """ Validation for granule format """ if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info("%s", rdt.pretty_print()) self.assertIsInstance(msg, Granule, "Message is improperly formatted. (%s)" % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id="", data_size=40): """ Loops until there is a sufficient amount of data in the dataset """ done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, "time")[0] granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt["time"] and rdt["time"][0] != rdt._pdict.get_context("time").fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) # -------------------------------------------------------------------------------- # Test Methods # -------------------------------------------------------------------------------- @attr("SMOKE") def test_dm_end_2_end(self): # -------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data # -------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext("binary", param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext("records", param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( "replay_pdict", parameter_context_ids=context_ids, temporal_context="time" ) stream_definition = self.pubsub_management.create_stream_definition( "ctd data", parameter_dictionary_id=pdict_id ) stream_id, route = self.pubsub_management.create_stream( "producer", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition ) # -------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted # -------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Now the granules are ingesting and persisted # -------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 40) # -------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive # -------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt["time"][:10] == np.arange(10)).all(), "%s" % rdt["time"][:]) self.assertTrue((rdt["binary"][:10] == np.array(["hi"] * 10, dtype="object")).all()) # -------------------------------------------------------------------------------- # Now to try the streamed approach # -------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream( "replay_out", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition ) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream_id ) log.info("Process ID: %s", process_id) replay_client = ReplayClient(process_id) # -------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to # -------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), "The process never launched") replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) # -------------------------------------------------------------------------------- # Test the slicing capabilities # -------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={"tdoa": slice(0, 5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt["time"] == np.arange(5) self.assertTrue(b.all() if not isinstance(b, bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id) @unittest.skip("Doesnt work") @attr("LOCOINT") @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode") def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext("binary", param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext("records", param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( "replay_pdict", parameter_context_ids=context_ids, temporal_context="time" ) stream_def_id = self.pubsub_management.create_stream_definition( "replay_stream", parameter_dictionary_id=pdict_id ) replay_stream, replay_route = self.pubsub_management.create_stream( "replay", "xp1", stream_definition_id=stream_def_id ) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt["time"] = np.arange(100) bb.rdt["temp"] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage ) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp("xp1") self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop() def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( "ctd_parsed_param_dict", id_only=True ) sal_stream_def_id = self.pubsub_management.create_stream_definition( "sal data", parameter_dictionary_id=salinity_pdict_id ) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt["time"] = np.arange(10) rdt["temp"] = np.random.randn(10) * 10 + 30 rdt["conductivity"] = np.random.randn(10) * 2 + 10 rdt["pressure"] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt["time"] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, "ion.processes.data.transforms.ctd.ctd_L2_salinity", "CTDL2SalinityTransformAlgorithm", kwargs=dict(params=sal_stream_def_id), ) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt["salinity"]: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id, route, 0) self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, 20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt["time"] == np.arange(10) + 10 if not isinstance(comp, bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt["time"] == np.arange(15, 20) if not isinstance(comp, bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_replay_with_parameters(self): # -------------------------------------------------------------------------------- # Create the configurations and the dataset # -------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext("binary", param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext("records", param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( "replay_pdict", parameter_context_ids=context_ids, temporal_context="time" ) stream_def_id = self.pubsub_management.create_stream_definition( "replay_stream", parameter_dictionary_id=pdict_id ) stream_id, route = self.pubsub_management.create_stream( "replay_with_params", exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id ) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Coerce the datastore into existence (beats race condition) # -------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 40) query = { "start_time": 0 - 2208988800, "end_time": 20 - 2208988800, "stride_time": 2, "parameters": ["time", "temp"], } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0, 20, 2) == rdt["time"] self.assertTrue(comp.all(), "%s" % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=["time", "temp"]) self.assertTrue(extents["time"] >= 20) self.assertTrue(extents["temp"] >= 20) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id, route, 0) self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, 20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) self.publish_hifi(stream_id, route, 2) self.publish_hifi(stream_id, route, 3) self.wait_until_we_have_enough_granules(dataset_id, 40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt["time"] == np.arange(0, 40) if not isinstance(comp, bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) @attr("LOCOINT") @unittest.skipIf( os.getenv("CEI_LAUNCH_TEST", False), "Host requires file-system access to coverage files, CEI mode does not support.", ) def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values("time", np.arange(ntp_ago, ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2) @attr("LOCOINT") @unittest.skipIf( os.getenv("CEI_LAUNCH_TEST", False), "Host requires file-system access to coverage files, CEI mode does not support.", ) def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertEquals([coverage.get_parameter_context("time").fill_value] * 2, temporal_bounds) @attr("LOCOINT") @unittest.skipIf( os.getenv("CEI_LAUNCH_TEST", False), "Host requires file-system access to coverage files, CEI mode does not support.", ) def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt["time"] == np.arange(40)).all()) @attr("LOCOINT") @unittest.skipIf( os.getenv("CEI_LAUNCH_TEST", False), "Host requires file-system access to coverage files, CEI mode does not support.", ) def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values("time", np.arange(10)) coverage.set_parameter_values("temp", np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set()) @unittest.skip("Outdated due to ingestion retry") @attr("LOCOINT") @unittest.skipIf( os.getenv("CEI_LAUNCH_TEST", False), "Host requires file-system access to coverage files, CEI mode does not support.", ) def test_ingestion_failover(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) event = Event() def cb(*args, **kwargs): event.set() sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception") sub.start() self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) file_path = DatasetManagementService._get_coverage_path(dataset_id) master_file = os.path.join(file_path, "%s_master.hdf5" % dataset_id) with open(master_file, "w") as f: f.write("this will crash HDF") self.publish_hifi(stream_id, route, 5) self.assertTrue(event.wait(10)) sub.stop()
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dms_cli = DatasetManagementServiceClient() dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() pubsub_cli = PubsubManagementServiceClient() eda = ExternalDatasetAgent(name='example eda',handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls']) eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance(name='example eda instance') eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(name='example data provider', institution=Institution(), contact=ContactInformation()) dprov.contact.individual_names_given = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(name='example datasource', protocol_type='DAP', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.individual_names_given = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'usgs_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # The usgs.nc test dataset is a download of the R1 dataset found here: # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml dset.dataset_description.parameters['dataset_path'] = 'test_data/usgs.nc' dset.dataset_description.parameters['temporal_dimension'] = 'time' dset.dataset_description.parameters['zonal_dimension'] = 'lon' dset.dataset_description.parameters['meridional_dimension'] = 'lat' dset.dataset_description.parameters['vertical_dimension'] = 'z' dset.dataset_description.parameters['variables'] = [ 'water_temperature', 'streamflow', 'water_temperature_bottom', 'water_temperature_middle', 'specific_conductance', 'data_qualifier', ] # Create DataSourceModel dsrc_model = DataSourceModel(name='dap_model') #dsrc_model.model = 'DAP' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id) #create temp streamdef so the data product can create the stream pc_list = [] #Get 'time' parameter context pc_list.append(dms_cli.read_parameter_context_by_name('time', id_only=True)) for pc_k, pc in self._create_parameter_dictionary().iteritems(): pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump())) pdict_id = dms_cli.create_parameter_dictionary('netcdf_param_dict', pc_list) #create temp streamdef so the data product can create the stream streamdef_id = pubsub_cli.create_stream_definition(name="netcdf", description="netcdf", parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() tdom, sdom = tdom.dump(), sdom.dump() dprod = IonObject(RT.DataProduct, name='usgs_parsed_product', description='parsed usgs product', temporal_domain=tdom, spatial_domain=sdom) # Generate the data product and associate it to the ExternalDataset dproduct_id = dpms_cli.create_data_product(data_product=dprod, stream_definition_id=streamdef_id) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id})) # Create the logger for receiving publications _, stream_route, _ = self.create_stream_and_logger(name='usgs', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'stream_route': stream_route, 'stream_def': streamdef_id, 'data_producer_id': dproducer_id, # CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 1, }
class DatasetManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() def test_dataset_crud(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) dataset = Dataset(name='ctd_dataset') dataset_id = self.dataset_management.create_dataset( dataset, parameter_dictionary_id=pdict_id) ds_obj = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, 'ctd_dataset') ds_obj.name = 'something different' self.dataset_management.update_dataset(ds_obj) ds_obj2 = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, ds_obj2.name) def test_context_crud(self): context_ids = self.create_contexts() context_id = context_ids.pop() ctxt = self.dataset_management.read_parameter_context(context_id) context = DatasetManagementService.get_coverage_parameter(ctxt) self.assertIsInstance(context, CoverageParameterContext) self.dataset_management.delete_parameter_context(context_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_context(context_id) def test_pfunc_crud(self): contexts, funcs = self.create_pfuncs() context_ids = [context_id for context_id in contexts.itervalues()] pdict_id = self.dataset_management.create_parameter_dictionary( name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) expr_id = funcs['CONDWAT_L1'] expr = self.dataset_management.read_parameter_function(expr_id) func_class = DatasetManagementService.get_coverage_function(expr) self.assertIsInstance(func_class, NumexprFunction) def test_pdict_crud(self): context_ids = self.create_contexts() pdict_res_id = self.dataset_management.create_parameter_dictionary( name='pdict1', parameter_context_ids=context_ids, temporal_context='time') pdict_contexts = self.dataset_management.read_parameter_contexts( parameter_dictionary_id=pdict_res_id, id_only=True) pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id) self.assertIsInstance(pdict, ParameterDictionary) self.assertTrue('time_test' in pdict) self.assertEquals(pdict.identifier, pdict_res_id) self.assertEquals(set(pdict_contexts), set(context_ids)) self.dataset_management.delete_parameter_dictionary( parameter_dictionary_id=pdict_res_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_dictionary( parameter_dictionary_id=pdict_res_id) def create_contexts(self): context_ids = [] cond = ParameterContext(name='condictivity_test', parameter_type='quantity', value_encoding='float32', units='1', fill_value=0) context_ids.append(self.dataset_management.create_parameter(cond)) pres = ParameterContext(name='pressure_test', parameter_type='quantity', value_encoding='float32', units='Pa', fill_value=0) context_ids.append(self.dataset_management.create_parameter(pres)) sal = ParameterContext(name='salinity_test', parameter_type='quantity', value_encoding='float32', units='psu', fill_value=0) context_ids.append(self.dataset_management.create_parameter(sal)) temp = ParameterContext(name='temp_test', parameter_type='quantity', value_encoding='float32', units='degree_C', fill_value=0) context_ids.append(self.dataset_management.create_parameter(temp)) time_test = ParameterContext(name='time_test', parameter_type='quantity', value_encoding='float32', units='seconds since 1970-01-01', fill_value=0) context_ids.append(self.dataset_management.create_parameter(time_test)) return context_ids def create_pfuncs(self): contexts = {} funcs = {} time_ = ParameterContext(name='TIME', parameter_type='quantity', value_encoding='float32', units='seconds since 1900-01-01', fill_value=0) t_ctxt_id = self.dataset_management.create_parameter(time_) contexts['TIME'] = t_ctxt_id lat = ParameterContext(name='LAT', parameter_type='sparse', value_encoding='float32', units='degrees_north', fill_value=-9999.) lat_ctxt_id = self.dataset_management.create_parameter(lat) contexts['LAT'] = lat_ctxt_id lon = ParameterContext(name='LON', parameter_type="sparse", value_encoding='float32', units='degrees_east', fill_value=-9999) lon_ctxt_id = self.dataset_management.create_parameter(lon) contexts['LON'] = lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp = ParameterContext(name='TEMPWAT_L0', parameter_type='quantity', value_encoding='float32', units='deg_C') temp_ctxt_id = self.dataset_management.create_parameter(temp) contexts['TEMPWAT_L0'] = temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond = ParameterContext(name='CONDWAT_L0', parameter_type='quantity', value_encoding='float32', units='S m-1') cond_ctxt_id = self.dataset_management.create_parameter(cond) contexts['CONDWAT_L0'] = cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press = ParameterContext(name='PRESWAT_L0', parameter_type='quantity', value_encoding='float32', units='dbar') press_ctxt_id = self.dataset_management.create_parameter(press) contexts['PRESWAT_L0'] = press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tempwat_f = ParameterFunction(name='TEMPWAT_L1', function_type=PFT.NUMEXPR, function=tl1_func, args=['T']) expr_id = self.dataset_management.create_parameter_function(tempwat_f) funcs['TEMPWAT_L1'] = expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} tempL1 = ParameterContext(name='TEMPWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=tl1_pmap, value_encoding='float32', units='deg_C') tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1) contexts['TEMPWAT_L1'] = tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' condwat_f = ParameterFunction(name='CONDWAT_L1', function_type=PFT.NUMEXPR, function=cl1_func, args=['C']) expr_id = self.dataset_management.create_parameter_function(condwat_f) funcs['CONDWAT_L1'] = expr_id cl1_pmap = {'C': 'CONDWAT_L0'} condL1 = ParameterContext(name='CONDWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=cl1_pmap, value_encoding='float32', units='S m-1') condL1_ctxt_id = self.dataset_management.create_parameter(condL1) contexts['CONDWAT_L1'] = condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' preswat_f = ParameterFunction(name='PRESWAT_L1', function_type=PFT.NUMEXPR, function=pl1_func, args=['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function(preswat_f) funcs['PRESWAT_L1'] = expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} presL1 = ParameterContext(name='PRESWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=pl1_pmap, value_encoding='float32', units='dbar') presL1_ctxt_id = self.dataset_management.create_parameter(presL1) contexts['PRESWAT_L1'] = presL1_ctxt_id # A magic function that may or may not exist actually forms the line below at runtime. cond_f = ParameterFunction(name='condwat10', function_type=PFT.NUMEXPR, function='C*10', args=['C']) expr_id = self.dataset_management.create_parameter_function(cond_f) cond10 = ParameterContext(name='c10', parameter_type='function', parameter_function_id=expr_id, parameter_function_map={'C': 'CONDWAT_L1'}, value_encoding='float32', units='1') cond10_id = self.dataset_management.create_parameter(cond10) contexts['C10'] = cond10_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] pracsal_f = ParameterFunction(name='PRACSAL', function_type=PFT.PYTHON, owner=owner, function=sal_func, args=sal_arglist) expr_id = self.dataset_management.create_parameter_function(pracsal_f) funcs['PRACSAL'] = expr_id sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_ctxt = ParameterContext(name='PRACSAL', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=sal_pmap, value_encoding='float32', units='g kg-1') sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt) contexts['PRACSAL'] = sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) return contexts, funcs def test_verify_contexts(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) pcontexts = self.dataset_management.read_parameter_contexts( parameter_dictionary_id=pdict_id) for pcontext in pcontexts: self.assertTrue('fill_value' in pcontext) self.assertTrue('reference_urls' in pcontext) self.assertTrue('internal_name' in pcontext) self.assertTrue('display_name' in pcontext) self.assertTrue('standard_name' in pcontext) self.assertTrue('ooi_short_name' in pcontext) self.assertTrue('description' in pcontext) self.assertTrue('precision' in pcontext)
class TestStreamIngestionWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) self.stream_id, self.route_id = self.pubsub_client.create_stream(name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data') self.addCleanup(self.pubsub_client.delete_stream, self.stream_id) self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_stream_ingestion_worker(self): self.start_ingestion_worker() context_ids, time_ctxt = self._create_param_contexts() pdict_id = self.dataset_management_client.create_parameter_dictionary(name='stream_ingestion_pdict', parameter_context_ids=context_ids, temporal_context='ingestion_timestamp') self.addCleanup(self.dataset_management_client.delete_parameter_dictionary, pdict_id) dataset_id = self.dataset_management_client.create_dataset(name='fake_dataset', description='fake_dataset', stream_id=self.stream_id, parameter_dictionary_id=pdict_id) self.addCleanup(self.dataset_management_client.delete_dataset, dataset_id) self.cov = self._create_coverage(dataset_id=dataset_id, parameter_dict_id=pdict_id, time_dom=self.time_dom, spatial_dom=self.spatial_dom) self.addCleanup(self.cov.close) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['conductivity'] = 1 rdt['pressure'] = 2 rdt['salinity'] = 3 self.start_listener(dataset_id) self.publisher.publish(rdt.to_granule()) self.data_modified = Event() self.data_modified.wait(30) cov = self.get_coverage(dataset_id) self.assertIsNotNone(cov.get_parameter_values('raw')) deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) granule = retrieve_stream(dataset_id) rdt_complex = RecordDictionaryTool.load_from_granule(granule) rdt_complex['raw'] = [deserializer.deserialize(i) for i in rdt_complex['raw']] for gran in rdt_complex['raw']: rdt_new = RecordDictionaryTool.load_from_granule(gran) self.assertIn(1, rdt_new['conductivity']) self.assertIn(2, rdt_new['pressure']) self.assertIn(3, rdt_new['salinity']) cov.close() def start_ingestion_worker(self): config = DotDict() config.process.queue_name = 'parsed_subscription' self.container.spawn_process( name='stream_ingestion_worker', module='ion.processes.data.ingestion.stream_ingestion_worker', cls='StreamIngestionWorker', config=config ) def start_listener(self, dataset_id): def cb(*args, **kwargs): self.data_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() self.addCleanup(es.stop) def _create_param_contexts(self): context_ids = [] t_ctxt = ParameterContext('ingestion_timestamp', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.uom = 'seconds since 1900-01-01' t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management_client.create_parameter_context(name='ingestion_timestamp', parameter_context=t_ctxt.dump()) context_ids.append(t_ctxt_id) raw_ctxt = ParameterContext('raw', param_type=ArrayType()) raw_ctxt.uom = '' context_ids.append(self.dataset_management_client.create_parameter_context(name='raw', parameter_context=raw_ctxt.dump())) return context_ids, t_ctxt_id def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom): pd = self.dataset_management_client.read_parameter_dictionary(parameter_dict_id) pdict = ParameterDictionary.load(pd) sdom = GridDomain.load(spatial_dom.dump()) tdom = GridDomain.load(time_dom.dump()) file_root = FileSystem.get_url(FS.CACHE,'datasets') scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) return scov def get_coverage(self, dataset_id,mode='w'): file_root = FileSystem.get_url(FS.CACHE,'datasets') coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode) return coverage
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def _create_proc_def(self): dpd_obj = DataProcessDefinition( name='Optimus', description='It\'s a transformer', module='ion.processes.data.transforms.transform_prime', class_name='TransformPrime') return self.data_process_management.create_data_process_definition(dpd_obj) def _L0_pdict(self): t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management.create_parameter_context(name='time', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom) lat_ctxt = ParameterContext('lat', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32')))) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt.fill_value = -9999 lat_ctxt_id = self.dataset_management.create_parameter_context(name='lat', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom) lon_ctxt = ParameterContext('lon', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32')))) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt.fill_value = -9999 lon_ctxt_id = self.dataset_management.create_parameter_context(name='lon', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom) temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) temp_ctxt.uom = 'deg_C' temp_ctxt.fill_value = -9999 temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) cond_ctxt.uom = 'S m-1' cond_ctxt.fill_value = -9999 cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) press_ctxt.uom = 'dbar' press_ctxt.fill_value = -9999 press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom) context_ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id] pdict_id = self.dataset_management.create_parameter_dictionary('L0 SBE37', parameter_context_ids=context_ids, temporal_context='time') return pdict_id def _L1_pdict(self): pdict_id = self._L0_pdict() param_context_ids = self.dataset_management.read_parameter_contexts(pdict_id,id_only=True) # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(TEMPWAT_L0 / 10000) - 10' tl1_pmap = {'TEMPWAT_L0':'TEMPWAT_L0'} func = NumexprFunction('TEMPWAT_L1', tl1_func, tl1_pmap) tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom) param_context_ids.append(tempL1_ctxt_id) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(CONDWAT_L0 / 100000) - 0.5' cl1_pmap = {'CONDWAT_L0':'CONDWAT_L0'} func = NumexprFunction('CONDWAT_L1', cl1_func, cl1_pmap) condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom) param_context_ids.append(condL1_ctxt_id) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)' pl1_pmap = {'PRESWAT_L0':'PRESWAT_L0'} func = NumexprFunction('PRESWAT_L1', pl1_func, pl1_pmap) presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom) param_context_ids.append(presL1_ctxt_id) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = [NumexprFunction('CONDWAT_L1*10', 'C*10', {'C':'CONDWAT_L1'}), 'TEMPWAT_L1', 'PRESWAT_L1'] sal_kwargmap = None func = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom) param_context_ids.append(sal_ctxt_id) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'lon','lat'], None) #abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None) cons_temp_func = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_func, 'TEMPWAT_L1', 'PRESWAT_L1'], None) dens_func = PythonFunction('DENSITY', owner, 'rho', [abs_sal_func, cons_temp_func, 'PRESWAT_L1'], None) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom) param_context_ids.append(dens_ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary('L1_SBE37', parameter_context_ids=param_context_ids, temporal_context='time') return pdict_id def _data_product(self, name, stream_def, exchange_pt): tdom, sdom = time_series_domain() dp_obj = DataProduct(name=name, description='blah', spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) dp_id = self.data_product_management.create_data_product(dp_obj, stream_def, exchange_pt) return dp_id def _data_process(self, proc_def_id, input_products, output_product, stream_def): fake_producer = DataProducer(name='fake_producer') fake_producer_id, _ = self.container.resource_registry.create(fake_producer) self.data_process_management.assign_stream_definition_to_data_process_definition(stream_def,proc_def_id,binding='output') data_process_id = self.data_process_management.create_data_process(proc_def_id, input_products, {'output':output_product}) self.container.resource_registry.create_association(subject=data_process_id, predicate=PRED.hasDataProducer, object=fake_producer_id) self.data_process_management.activate_data_process(data_process_id) def _fake_producer(self): if not hasattr(self, 'producer'): self.fake_producer_id,_ = self.container.resource_registry.create(DataProducer(name='fake_producer')) return self.fake_producer_id def _publisher(self, data_product_id): stream_ids, _ = self.container.resource_registry.find_resources(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id, route) return publisher def _get_param_vals(self, name, slice_, dims): shp = utils.slice_shape(slice_, dims) def _getarr(vmin, shp, vmax=None,): if vmax is None: return np.empty(shp).fill(vmin) return np.arange(vmin, vmax, (vmax - vmin) / int(utils.prod(shp)), dtype='float32').reshape(shp) if name == 'LAT': ret = np.empty(shp) ret.fill(45) elif name == 'LON': ret = np.empty(shp) ret.fill(-71) elif name == 'TEMPWAT_L0': ret = _getarr(280000, shp, 350000) elif name == 'CONDWAT_L0': ret = _getarr(100000, shp, 750000) elif name == 'PRESWAT_L0': ret = _getarr(3000, shp, 10000) elif name in self.value_classes: # Non-L0 parameters ret = self.value_classes[name][:] else: return np.zeros(shp) return ret def _setup_streams(self, exchange_pt1, exchange_pt2, available_fields_in=[], available_fields_out=[]): proc_def_id = self._create_proc_def() incoming_pdict_id = self._L0_pdict() outgoing_pdict_id = self._L1_pdict() incoming_stream_def_id = self.pubsub_management.create_stream_definition('L0_stream_def', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('L1_stream_def', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) L0_data_product_id = self._data_product('L0_SBE37', incoming_stream_def_id, exchange_pt1) L1_data_product_id = self._data_product('L1_SBE37', outgoing_stream_def_id, exchange_pt2) self._data_process(proc_def_id, [L0_data_product_id], L1_data_product_id, outgoing_stream_def_id) stream_ids, _ = self.container.resource_registry.find_objects(L0_data_product_id, PRED.hasStream, None, True) stream_id_in = stream_ids[0] stream_ids, _ = self.container.resource_registry.find_objects(L1_data_product_id, PRED.hasStream, None, True) stream_id_out = stream_ids[0] stream_route_in = self.pubsub_management.read_stream_route(stream_id_in) stream_route_out = self.pubsub_management.read_stream_route(stream_id_out) return (stream_id_in,stream_id_out,stream_route_in,stream_route_out,incoming_stream_def_id,outgoing_stream_def_id) def _validate_transforms(self, rdt_in, rdt_out): #passthrus self.assertTrue(np.allclose(rdt_in['time'], rdt_out['time'])) self.assertTrue(np.allclose(rdt_in['lat'], rdt_out['lat'])) self.assertTrue(np.allclose(rdt_in['lon'], rdt_out['lon'])) self.assertTrue(np.allclose(rdt_in['TEMPWAT_L0'], rdt_out['TEMPWAT_L0'])) self.assertTrue(np.allclose(rdt_in['CONDWAT_L0'], rdt_out['CONDWAT_L0'])) self.assertTrue(np.allclose(rdt_in['PRESWAT_L0'], rdt_out['PRESWAT_L0'])) # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 t1 = (rdt_out['TEMPWAT_L0'] / 10000) - 10 self.assertTrue(np.allclose(rdt_out['TEMPWAT_L1'], t1)) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 c1 = (rdt_out['CONDWAT_L0'] / 100000) - 0.5 self.assertTrue(np.allclose(rdt_out['CONDWAT_L1'], c1)) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) p1 = (rdt_out['PRESWAT_L0'] * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721) self.assertTrue(np.allclose(rdt_out['PRESWAT_L1'], p1)) # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) ps = gsw.SP_from_C((rdt_out['CONDWAT_L1'] * 10.), rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1']) self.assertTrue(np.allclose(rdt_out['PRACSAL'], ps)) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) abs_sal = gsw.SA_from_SP(rdt_out['PRACSAL'], rdt_out['PRESWAT_L1'], rdt_out['lon'], rdt_out['lat']) cons_temp = gsw.CT_from_t(abs_sal, rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1']) rho = gsw.rho(abs_sal, cons_temp, rdt_out['PRESWAT_L1']) self.assertTrue(np.allclose(rdt_out['DENSITY'], rho)) def test_execute_transform(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}}) config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out)) #need below to wrap result in a param val object rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule()) for k,v in rdt_out.iteritems(): self.assertEqual(len(v), dt) self._validate_transforms(rdt_in, rdt_out) self.container.proc_manager.terminate_process(pid) def test_transform_prime_no_available_fields(self): available_fields_in = [] available_fields_out = [] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4)) #self.container.proc_manager.terminate_process(pid) def test_transform_prime(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) self._validate_transforms(rdt_in, rdt_out) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4))
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dms_cli = DatasetManagementServiceClient() dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() pubsub_cli = PubsubManagementServiceClient() eda = ExternalDatasetAgent(name='example data agent', handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls']) eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance(name='example dataset agent instance') eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(name='example data provider', institution=Institution(), contact=ContactInformation()) dprov.contact.individual_names_given = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(name='example datasource', protocol_type='FILE', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.individual_names_given = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'ruv_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) dset.dataset_description.parameters['base_url'] = 'test_data/ruv/' dset.dataset_description.parameters['list_pattern'] = 'RDLi_SEAB_2011_08_24_1600.ruv' dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H %M' dset.dataset_description.parameters['date_extraction_pattern'] = 'RDLi_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv' dset.dataset_description.parameters['temporal_dimension'] = None dset.dataset_description.parameters['zonal_dimension'] = None dset.dataset_description.parameters['meridional_dimension'] = None dset.dataset_description.parameters['vertical_dimension'] = None dset.dataset_description.parameters['variables'] = [ ] # Create DataSourceModel dsrc_model = DataSourceModel(name='ruv_model') #dsrc_model.model = 'RUV' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id) pdict = ParameterDictionary() t_ctxt = ParameterContext('data', param_type=QuantityType(value_encoding=numpy.dtype('int64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump())) pdict_id = dms_cli.create_parameter_dictionary('ruv_param_dict', pc_list) streamdef_id = pubsub_cli.create_stream_definition(name="ruv", description="stream def for ruv testing", parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() tdom, sdom = tdom.dump(), sdom.dump() dprod = IonObject(RT.DataProduct, name='ruv_parsed_product', description='parsed ruv product', temporal_domain=tdom, spatial_domain=sdom) # Generate the data product and associate it to the ExternalDataset dproduct_id = dpms_cli.create_data_product(data_product=dprod, stream_definition_id=streamdef_id) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id})) #CBM: Eventually, probably want to group this crap somehow - not sure how yet... # Create the logger for receiving publications _, stream_route, _ = self.create_stream_and_logger(name='ruv', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'stream_route': stream_route, 'external_dataset_res': dset, 'param_dictionary': pdict.dump(), 'data_producer_id': dproducer_id, # CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 20, }
class TestStreamIngestionWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) self.stream_id, self.route_id = self.pubsub_client.create_stream( name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data') self.addCleanup(self.pubsub_client.delete_stream, self.stream_id) self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_stream_ingestion_worker(self): self.start_ingestion_worker() context_ids, time_ctxt = self._create_param_contexts() pdict_id = self.dataset_management_client.create_parameter_dictionary( name='stream_ingestion_pdict', parameter_context_ids=context_ids, temporal_context='ingestion_timestamp') self.addCleanup( self.dataset_management_client.delete_parameter_dictionary, pdict_id) dataset_id = self.dataset_management_client.create_dataset( name='fake_dataset', description='fake_dataset', stream_id=self.stream_id, spatial_domain=self.spatial_dom.dump(), temporal_domain=self.time_dom.dump(), parameter_dictionary_id=pdict_id) self.addCleanup(self.dataset_management_client.delete_dataset, dataset_id) self.cov = self._create_coverage(dataset_id=dataset_id, parameter_dict_id=pdict_id, time_dom=self.time_dom, spatial_dom=self.spatial_dom) self.addCleanup(self.cov.close) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['conductivity'] = 1 rdt['pressure'] = 2 rdt['salinity'] = 3 self.start_listener(dataset_id) self.publisher.publish(rdt.to_granule()) self.data_modified = Event() self.data_modified.wait(30) cov = self.get_coverage(dataset_id) self.assertIsNotNone(cov.get_parameter_values('raw')) deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) granule = retrieve_stream(dataset_id) rdt_complex = RecordDictionaryTool.load_from_granule(granule) rdt_complex['raw'] = [ deserializer.deserialize(i) for i in rdt_complex['raw'] ] for gran in rdt_complex['raw']: rdt_new = RecordDictionaryTool.load_from_granule(gran) self.assertIn(1, rdt_new['conductivity']) self.assertIn(2, rdt_new['pressure']) self.assertIn(3, rdt_new['salinity']) cov.close() def start_ingestion_worker(self): config = DotDict() config.process.queue_name = 'parsed_subscription' self.container.spawn_process( name='stream_ingestion_worker', module='ion.processes.data.ingestion.stream_ingestion_worker', cls='StreamIngestionWorker', config=config) def start_listener(self, dataset_id): def cb(*args, **kwargs): self.data_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() self.addCleanup(es.stop) def _create_param_contexts(self): context_ids = [] t_ctxt = ParameterContext( 'ingestion_timestamp', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.uom = 'seconds since 1900-01-01' t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management_client.create_parameter_context( name='ingestion_timestamp', parameter_context=t_ctxt.dump()) context_ids.append(t_ctxt_id) raw_ctxt = ParameterContext('raw', param_type=ArrayType()) raw_ctxt.uom = '' context_ids.append( self.dataset_management_client.create_parameter_context( name='raw', parameter_context=raw_ctxt.dump())) return context_ids, t_ctxt_id def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom): pd = self.dataset_management_client.read_parameter_dictionary( parameter_dict_id) pdict = ParameterDictionary.load(pd) sdom = GridDomain.load(spatial_dom.dump()) tdom = GridDomain.load(time_dom.dump()) file_root = FileSystem.get_url(FS.CACHE, 'datasets') scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) return scov def get_coverage(self, dataset_id, mode='w'): file_root = FileSystem.get_url(FS.CACHE, 'datasets') coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode) return coverage
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dms_cli = DatasetManagementServiceClient() dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() pubsub_cli = PubsubManagementServiceClient() eda = ExternalDatasetAgent(handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls']) eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) dprov.contact.individual_names_given = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(protocol_type='DAP', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.individual_names_given = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'dummy_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # The usgs.nc test dataset is a download of the R1 dataset found here: # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml dset.dataset_description.parameters['base_url'] = 'test_data/dummy' dset.dataset_description.parameters['list_pattern'] = 'test*.dum' dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H' dset.dataset_description.parameters['date_extraction_pattern'] = 'test([\d]{4})-([\d]{2})-([\d]{2})-([\d]{2}).dum' dset.dataset_description.parameters['temporal_dimension'] = 'time' dset.dataset_description.parameters['zonal_dimension'] = 'lon' dset.dataset_description.parameters['meridional_dimension'] = 'lat' dset.dataset_description.parameters['variables'] = [ 'dummy', ] # Create DataSourceModel dsrc_model = DataSourceModel(name='dap_model') #dsrc_model.model = 'DAP' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id) #create temp streamdef so the data product can create the stream pc_list = [] #Get 'time' parameter context pc_list.append(dms_cli.read_parameter_context_by_name('time', id_only=True)) pc_list.append(dms_cli.read_parameter_context_by_name('dummy', id_only=True)) for pc_k, pc in self._create_parameter_dictionary().iteritems(): pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump())) pdict_id = dms_cli.create_parameter_dictionary('dummy_param_dict', pc_list) streamdef_id = pubsub_cli.create_stream_definition(name="dummy", parameter_dictionary_id=pdict_id, description="dummy") tdom, sdom = time_series_domain() tdom, sdom = tdom.dump(), sdom.dump() dprod = DataProduct( name='dummy_dataset', description='dummy data product', temporal_domain=tdom, spatial_domain=sdom) dproduct_id = dpms_cli.create_data_product(data_product=dprod, stream_definition_id=streamdef_id) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id) # , create_stream=True) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id})) # Create the logger for receiving publications _, stream_route, stream_def = self.create_stream_and_logger(name='dummy', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'stream_route': stream_route, 'stream_def': stream_def, 'data_producer_id': dproducer_id, # CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 4, } folder = 'test_data/dummy' if os.path.isdir(folder): for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: log.debug('_setup_resources error: {0}'.format(e)) if not os.path.exists(folder): os.makedirs(folder) self.add_dummy_file('test_data/dummy/test2012-02-01-12.dum') self.add_dummy_file('test_data/dummy/test2012-02-01-13.dum') self.add_dummy_file('test_data/dummy/test2012-02-01-14.dum') self.add_dummy_file('test_data/dummy/test2012-02-01-15.dum')
class DatasetManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() def test_dataset_crud(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) tdom, sdom = time_series_domain() dataset_id = self.dataset_management.create_dataset( name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) ds_obj = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, 'ctd_dataset') ds_obj.name = 'something different' self.dataset_management.update_dataset(ds_obj) self.dataset_management.register_dataset(dataset_id) ds_obj2 = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, ds_obj2.name) self.assertTrue(ds_obj2.registered) def test_context_crud(self): context_ids = self.create_contexts() context_id = context_ids.pop() context = DatasetManagementService.get_parameter_context(context_id) self.assertIsInstance(context, ParameterContext) self.assertEquals(context.identifier, context_id) self.dataset_management.delete_parameter_context(context_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_context(context_id) def test_pfunc_crud(self): contexts, funcs = self.create_pfuncs() context_ids = [ context_id for ctxt, context_id in contexts.itervalues() ] pdict_id = self.dataset_management.create_parameter_dictionary( name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) expr, expr_id = funcs['CONDWAT_L1'] func_class = DatasetManagementService.get_parameter_function(expr_id) self.assertIsInstance(func_class, NumexprFunction) def test_pdict_crud(self): context_ids = self.create_contexts() pdict_res_id = self.dataset_management.create_parameter_dictionary( name='pdict1', parameter_context_ids=context_ids, temporal_context='time') pdict_contexts = self.dataset_management.read_parameter_contexts( parameter_dictionary_id=pdict_res_id, id_only=True) pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id) self.assertIsInstance(pdict, ParameterDictionary) self.assertTrue('time_test' in pdict) self.assertEquals(pdict.identifier, pdict_res_id) self.assertEquals(set(pdict_contexts), set(context_ids)) self.dataset_management.delete_parameter_dictionary( parameter_dictionary_id=pdict_res_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_dictionary( parameter_dictionary_id=pdict_res_id) def create_contexts(self): context_ids = [] cond_ctxt = ParameterContext( 'conductivity_test', param_type=QuantityType(value_encoding=np.float32)) cond_ctxt.uom = 'unknown' cond_ctxt.fill_value = 0e0 context_ids.append( self.dataset_management.create_parameter_context( name='conductivity_test', parameter_context=cond_ctxt.dump())) pres_ctxt = ParameterContext( 'pressure_test', param_type=QuantityType(value_encoding=np.float32)) pres_ctxt.uom = 'Pascal' pres_ctxt.fill_value = 0x0 context_ids.append( self.dataset_management.create_parameter_context( name='pressure_test', parameter_context=pres_ctxt.dump())) sal_ctxt = ParameterContext( 'salinity_test', param_type=QuantityType(value_encoding=np.float32)) sal_ctxt.uom = 'PSU' sal_ctxt.fill_value = 0x0 context_ids.append( self.dataset_management.create_parameter_context( name='salinity_test', parameter_context=sal_ctxt.dump())) temp_ctxt = ParameterContext( 'temp_test', param_type=QuantityType(value_encoding=np.float32)) temp_ctxt.uom = 'degree_Celsius' temp_ctxt.fill_value = 0e0 context_ids.append( self.dataset_management.create_parameter_context( name='temp_test', parameter_context=temp_ctxt.dump())) t_ctxt = ParameterContext( 'time_test', param_type=QuantityType(value_encoding=np.int64)) t_ctxt.uom = 'seconds since 1970-01-01' t_ctxt.fill_value = 0x0 context_ids.append( self.dataset_management.create_parameter_context( name='time_test', parameter_context=t_ctxt.dump())) return context_ids def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext( 'TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context( name='test_TIME', parameter_context=t_ctxt.dump()) contexts['TIME'] = (t_ctxt, t_ctxt_id) lat_ctxt = ParameterContext( 'LAT', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context( name='test_LAT', parameter_context=lat_ctxt.dump()) contexts['LAT'] = lat_ctxt, lat_ctxt_id lon_ctxt = ParameterContext( 'LON', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context( name='test_LON', parameter_context=lon_ctxt.dump()) contexts['LON'] = lon_ctxt, lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext( 'TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context( name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump()) contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext( 'CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump()) contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext( 'PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context( name='test_PRESWAT_L0', parameter_context=press_ctxt.dump()) contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T']) expr_id = self.dataset_management.create_parameter_function( name='test_TEMPWAT_L1', parameter_function=expr.dump()) funcs['TEMPWAT_L1'] = expr, expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} expr.param_map = tl1_pmap tempL1_ctxt = ParameterContext( 'TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_id=expr_id) contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C']) expr_id = self.dataset_management.create_parameter_function( name='test_CONDWAT_L1', parameter_function=expr.dump()) funcs['CONDWAT_L1'] = expr, expr_id cl1_pmap = {'C': 'CONDWAT_L0'} expr.param_map = cl1_pmap condL1_ctxt = ParameterContext( 'CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_id=expr_id) contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function( name='test_PRESWAT_L1', parameter_function=expr.dump()) funcs['PRESWAT_L1'] = expr, expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr.param_map = pl1_pmap presL1_ctxt = ParameterContext( 'PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_id=expr_id) contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist) expr_id = self.dataset_management.create_parameter_function( name='test_PRACSAL', parameter_function=expr.dump()) funcs['PRACSAL'] = expr, expr_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = { 'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1' } expr.param_map = sal_pmap sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context( name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_id=expr_id) contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT']) cons_temp_expr = PythonFunction( 'cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction( 'DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext( 'DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context( name='test_DENSITY', parameter_context=dens_ctxt.dump()) contexts['DENSITY'] = dens_ctxt, dens_ctxt_id return contexts, funcs def test_verify_contexts(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) pcontexts = self.dataset_management.read_parameter_contexts( parameter_dictionary_id=pdict_id) for pcontext in pcontexts: self.assertTrue('fill_value' in pcontext) self.assertTrue('reference_urls' in pcontext) self.assertTrue('internal_name' in pcontext) self.assertTrue('display_name' in pcontext) self.assertTrue('standard_name' in pcontext) self.assertTrue('ooi_short_name' in pcontext) self.assertTrue('description' in pcontext) self.assertTrue('precision' in pcontext)
class RecordDictionaryIntegrationTest(IonIntegrationTestCase): xps = [] xns = [] def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.rdt = None self.data_producer_id = None self.provider_metadata_update = None self.event = Event() def tearDown(self): for xn in self.xns: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.xps: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def verify_incoming(self, m, r, s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set() def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition( 'ctd', parameter_dictionary_id=pdict_id) pdict = DatasetManagementService.get_parameter_dictionary_by_name( 'ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) self.xps.append('xp1') publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() subscription_id = self.pubsub_management.create_subscription( 'sub', stream_ids=[stream_id]) self.xns.append('sub') self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1: 1} publisher.publish( rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1: 1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) filtered_stream_def_id = self.pubsub_management.create_stream_definition( 'filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields, ['time', 'temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k, v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None, None, None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) def test_rdt_param_funcs(self): rdt = self.create_rdt() rdt['TIME'] = [0] rdt['TEMPWAT_L0'] = [280000] rdt['CONDWAT_L0'] = [100000] rdt['PRESWAT_L0'] = [2789] rdt['LAT'] = [45] rdt['LON'] = [-71] np.testing.assert_array_almost_equal(rdt['DENSITY'], np.array([1001.76506258])) def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = [_id for ct, _id in contexts.itervalues()] pdict_id = self.dataset_management.create_parameter_dictionary( name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition( 'functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext( 'TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context( name='test_TIME', parameter_context=t_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, t_ctxt_id) contexts['TIME'] = (t_ctxt, t_ctxt_id) lat_ctxt = ParameterContext( 'LAT', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context( name='test_LAT', parameter_context=lat_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, lat_ctxt_id) contexts['LAT'] = lat_ctxt, lat_ctxt_id lon_ctxt = ParameterContext( 'LON', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context( name='test_LON', parameter_context=lon_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, lon_ctxt_id) contexts['LON'] = lon_ctxt, lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext( 'TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context( name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, temp_ctxt_id) contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext( 'CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, cond_ctxt_id) contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext( 'PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context( name='test_PRESWAT_L0', parameter_context=press_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, press_ctxt_id) contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T']) expr_id = self.dataset_management.create_parameter_function( name='test_TEMPWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['TEMPWAT_L1'] = expr, expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} expr.param_map = tl1_pmap tempL1_ctxt = ParameterContext( 'TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_ids=[expr_id]) self.addCleanup(self.dataset_management.delete_parameter_context, tempL1_ctxt_id) contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C']) expr_id = self.dataset_management.create_parameter_function( name='test_CONDWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['CONDWAT_L1'] = expr, expr_id cl1_pmap = {'C': 'CONDWAT_L0'} expr.param_map = cl1_pmap condL1_ctxt = ParameterContext( 'CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_ids=[expr_id]) self.addCleanup(self.dataset_management.delete_parameter_context, condL1_ctxt_id) contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function( name='test_PRESWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['PRESWAT_L1'] = expr, expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr.param_map = pl1_pmap presL1_ctxt = ParameterContext( 'PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context( name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_ids=[expr_id]) self.addCleanup(self.dataset_management.delete_parameter_context, presL1_ctxt_id) contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist) expr_id = self.dataset_management.create_parameter_function( name='test_PRACSAL', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['PRACSAL'] = expr, expr_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = { 'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1' } expr.param_map = sal_pmap sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context( name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_ids=[expr_id]) self.addCleanup(self.dataset_management.delete_parameter_context, sal_ctxt_id) contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT']) cons_temp_expr = PythonFunction( 'cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction( 'DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext( 'DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context( name='test_DENSITY', parameter_context=dens_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id) contexts['DENSITY'] = dens_ctxt, dens_ctxt_id return contexts, funcs
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.cci = 0 #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset = Dataset('test_dataset_%i'%self.i) dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id) self.addCleanup(self.dataset_management.delete_dataset, dataset_id) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.addCleanup(self.container.terminate_process, pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) # self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id,route,i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time') granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) subscriber.start() self.addCleanup(subscriber.stop) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20)) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.wait()) query = { 'start_time': 0 - 2208988800, 'end_time': 19 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2)) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) @unittest.skip('deprecated') def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.wait()) def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id,dataset_id) self.addCleanup(self.stop_ingestion, stream_id) # Publish initial granule # the first one has the sparse value set inside it, sets lat to 45 and lon to -71 ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [''] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) # Check the values and make sure they're correct np.testing.assert_allclose(rdt_out['time'], rdt['time']) np.testing.assert_allclose(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['lat'], np.array([45])) np.testing.assert_allclose(rdt_out['lon'], np.array([-71])) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) # We're going to change the lat/lon rdt = ph.get_rdt(stream_def_id) rdt['time'] = time.time() + 2208988800 rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['time'], rdt['time']) for i in xrange(9): ntp_now = time.time() + 2208988800 rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10)) np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9)) np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
class CtdTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber self.queue_cleanup = [] self.data_process_cleanup = [] def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) if parameter_dict_name == 'input_param_dict': temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) else: temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) if parameter_dict_name == 'input_param_dict': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) elif parameter_dict_name == 'output_param_dict' and pc[1].name == 'temp': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def _create_calibration_coefficients_dict(self): config = DotDict() config.process.calibration_coeffs = { 'temp_calibration_coeffs': { 'TA0' : 1.561342e-03, 'TA1' : 2.561486e-04, 'TA2' : 1.896537e-07, 'TA3' : 1.301189e-07, 'TOFFSET' : 0.000000e+00 }, 'cond_calibration_coeffs': { 'G' : -9.896568e-01, 'H' : 1.316599e-01, 'I' : -2.213854e-04, 'J' : 3.292199e-05, 'CPCOR' : -9.570000e-08, 'CTCOR' : 3.250000e-06, 'CSLOPE' : 1.000000e+00 }, 'pres_calibration_coeffs' : { 'PA0' : 4.960417e-02, 'PA1' : 4.883682e-04, 'PA2' : -5.687309e-12, 'PTCA0' : 5.249802e+05, 'PTCA1' : 7.595719e+00, 'PTCA2' : -1.322776e-01, 'PTCB0' : 2.503125e+01, 'PTCB1' : 5.000000e-05, 'PTCB2' : 0.000000e+00, 'PTEMPA0' : -6.431504e+01, 'PTEMPA1' : 5.168177e+01, 'PTEMPA2' : -2.847757e-01, 'POFFSET' : 0.000000e+00 } } return config def clean_queues(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def cleaning_operations(self): for dproc_id in self.data_process_cleanup: self.data_process_management.delete_data_process(dproc_id) def test_ctd_L1_all(self): """ Test that packets are processed by the ctd_L1_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='CTDBP_L1_Transform', description='Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.', module='ion.processes.data.transforms.ctdbp.ctdbp_L1', class_name='CTDBP_L1_Transform') dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() # Get the stream definition for the stream using the parameter dictionary L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_dict') L0_stream_def_id = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=L0_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id) L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'output_param_dict') L1_stream_def_id = self.pubsub.create_stream_definition(name='L1_out', parameter_dictionary_id=L1_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id) log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id) log.debug("Got the stream def for parsed input: %s", L0_stream_def_id) log.debug("got the stream def for the output: %s", L1_stream_def_id) # Input data product L0_stream_dp_obj = IonObject(RT.DataProduct, name='L0_stream', description='L0 stream input to CTBP L1 transform', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj, stream_definition_id=L0_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L1_stream_dp_obj = IonObject(RT.DataProduct, name='L1_stream', description='L1_stream output of CTBP L1 transform', temporal_domain = tdom, spatial_domain = sdom) L1_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L1_stream_dp_obj, stream_definition_id=L1_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id) # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process self.output_products = {'L1_stream' : L1_stream_dp_id} out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] config = self._create_calibration_coefficients_dict() dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products, config) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) input_stream_id = stream_ids[0] input_stream = self.resource_registry.read(input_stream_id) stream_route = input_stream.stream_route log.debug("The input stream for the L1 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L1 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ rdt = RecordDictionaryTool.load_from_granule(granule) self.assertIn('pressure', rdt) self.assertIn('temp', rdt) self.assertIn('conductivity', rdt) self.assertIn('time', rdt)
class DatasetManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() def test_dataset_crud(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) tdom, sdom = time_series_domain() dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) ds_obj = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, 'ctd_dataset') ds_obj.name = 'something different' self.dataset_management.update_dataset(ds_obj) self.dataset_management.register_dataset(dataset_id) ds_obj2 = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, ds_obj2.name) self.assertTrue(ds_obj2.registered) def test_context_crud(self): context_ids = self.create_contexts() context_id = context_ids.pop() context = DatasetManagementService.get_parameter_context(context_id) self.assertIsInstance(context, ParameterContext) self.assertEquals(context.identifier, context_id) self.dataset_management.delete_parameter_context(context_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_context(context_id) def test_pdict_crud(self): context_ids = self.create_contexts() pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time') pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True) pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id) self.assertIsInstance(pdict, ParameterDictionary) self.assertTrue('time_test' in pdict) self.assertEquals(pdict.identifier, pdict_res_id) self.assertEquals(set(pdict_contexts), set(context_ids)) self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id) def create_contexts(self): context_ids = [] cond_ctxt = ParameterContext('conductivity_test', param_type=QuantityType(value_encoding=np.float32)) cond_ctxt.uom = 'unknown' cond_ctxt.fill_value = 0e0 context_ids.append(self.dataset_management.create_parameter_context(name='conductivity_test', parameter_context=cond_ctxt.dump())) pres_ctxt = ParameterContext('pressure_test', param_type=QuantityType(value_encoding=np.float32)) pres_ctxt.uom = 'Pascal' pres_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='pressure_test', parameter_context=pres_ctxt.dump())) sal_ctxt = ParameterContext('salinity_test', param_type=QuantityType(value_encoding=np.float32)) sal_ctxt.uom = 'PSU' sal_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='salinity_test', parameter_context=sal_ctxt.dump())) temp_ctxt = ParameterContext('temp_test', param_type=QuantityType(value_encoding=np.float32)) temp_ctxt.uom = 'degree_Celsius' temp_ctxt.fill_value = 0e0 context_ids.append(self.dataset_management.create_parameter_context(name='temp_test', parameter_context=temp_ctxt.dump())) t_ctxt = ParameterContext('time_test', param_type=QuantityType(value_encoding=np.int64)) t_ctxt.uom = 'seconds since 1970-01-01' t_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='time_test', parameter_context=t_ctxt.dump())) return context_ids
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.pids = [] self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.purge_queues() self.queue_buffer = [] def purge_queues(self): xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion') xn.purge() def tearDown(self): self.purge_queues() for pid in self.pids: self.container.proc_manager.terminate_process(pid) IngestionManagementIntTest.clean_subscriptions() for queue in self.queue_buffer: if isinstance(queue, ExchangeNameQueue): queue.delete() elif isinstance(queue, str): xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def launch_producer(self, stream_id=''): #-------------------------------------------------------------------------------- # Launch the producer #-------------------------------------------------------------------------------- pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.pids.append(pid) def get_ingestion_config(self): #-------------------------------------------------------------------------------- # Grab the ingestion configuration from the resource registry #-------------------------------------------------------------------------------- # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def publish_hifi(self,stream_id,stream_route,offset=0): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): for i in xrange(4): self.publish_hifi(stream_id,route,i) def get_datastore(self, dataset_id): dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def validate_granule_subscription(self, msg, route, stream_id): if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def make_file_data(self): from interface.objects import File import uuid data = 'hello world\n' rand = str(uuid.uuid4())[:8] meta = File(name='/examples/' + rand + '.txt', group_id='example1') return {'body': data, 'meta':meta} def publish_file(self, stream_id, stream_route): publisher = StandaloneStreamPublisher(stream_id,stream_route) publisher.publish(self.make_file_data()) def wait_until_we_have_enough_granules(self, dataset_id='',granules=4): datastore = self.get_datastore(dataset_id) dataset = self.dataset_management.read_dataset(dataset_id) with gevent.timeout.Timeout(40): success = False while not success: success = len(datastore.query_view(dataset.view_name)) >= granules gevent.sleep(0.1) log.info(datastore.query_view(dataset.view_name)) def wait_until_we_have_enough_files(self): datastore = self.container.datastore_manager.get_datastore('filesystem', DataStore.DS_PROFILE.FILESYSTEM) now = time.time() timeout = now + 10 done = False while not done: if now >= timeout: raise Timeout('Files are not populating in time.') if len(datastore.query_view('catalog/file_by_owner')) >= 1: done = True now = time.time() def create_dataset(self, parameter_dict_id=''): tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset_id = self.dataset_management.create_dataset('test_dataset', parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom) return dataset_id @unittest.skip('Doesnt work') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop() @attr('SMOKE') def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,4) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) def test_retrieve_and_transform(self): # Stream definition for the CTD data pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id) ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) #-------------------------------------------------------------------------------- # Again with this ridiculous problem #-------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 2) granule = self.data_retriever.retrieve(dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i,0) def test_last_granule(self): #-------------------------------------------------------------------------------- # Create the necessary configurations for the test #-------------------------------------------------------------------------------- pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('last_granule', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Create the datastore first, #-------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,2) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_granule(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Coerce the datastore into existence (beats race condition) #-------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,4) query = { 'start_time': 0, 'end_time': 20, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0,20,2) == rdt['time'] self.assertTrue(comp.all(),'%s' % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) def test_repersist_data(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_def_id = self.pubsub_management.create_stream_definition(name='ctd', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream(name='repersist', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.get_datastore(dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,2) self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,4) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success)
class RecordDictionaryIntegrationTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.rdt = None self.data_producer_id = None self.provider_metadata_update = None self.event = Event() def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set() def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for field in rdt.fields: self.assertIsInstance(rdt[field], np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(10)) def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20)) def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule(connection_id='c1', connection_index='0') rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) self.assertEquals(rdt2.connection_id,'c1') self.assertEquals(rdt2.connection_index,'0') for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) def test_rdt_param_funcs(self): rdt = self.create_rdt() rdt['TIME'] = [0] rdt['TEMPWAT_L0'] = [280000] rdt['CONDWAT_L0'] = [100000] rdt['PRESWAT_L0'] = [2789] rdt['LAT'] = [45] rdt['LON'] = [-71] np.testing.assert_array_almost_equal(rdt['DENSITY'], np.array([1001.76506258], dtype='float32')) def test_rdt_lookup(self): rdt = self.create_lookup_rdt() self.assertTrue('offset_a' in rdt.lookup_values()) self.assertFalse('offset_b' in rdt.lookup_values()) rdt['time'] = [0] rdt['temp'] = [10.0] rdt['offset_a'] = [2.0] self.assertEquals(rdt['offset_b'], None) self.assertEquals(rdt.lookup_values(), ['offset_a']) np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0])) svm = StoredValueManager(self.container) svm.stored_value_cas('coefficient_document', {'offset_b':2.0}) svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0}) rdt.fetch_lookup_values() np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0])) np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0])) np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0])) def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = [_id for ct,_id in contexts.itervalues()] pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_lookup_rdt(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 1900-01-01' t_ctxt_id = self.dataset_management.create_parameter_context(name='test_TIME', parameter_context=t_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, t_ctxt_id) contexts['TIME'] = (t_ctxt, t_ctxt_id) lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context(name='test_LAT', parameter_context=lat_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, lat_ctxt_id) contexts['LAT'] = lat_ctxt, lat_ctxt_id lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context(name='test_LON', parameter_context=lon_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, lon_ctxt_id) contexts['LON'] = lon_ctxt, lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, temp_ctxt_id) contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, cond_ctxt_id) contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRESWAT_L0', parameter_context=press_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, press_ctxt_id) contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T']) expr_id = self.dataset_management.create_parameter_function(name='test_TEMPWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['TEMPWAT_L1'] = expr, expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} expr.param_map = tl1_pmap tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_id=expr_id) self.addCleanup(self.dataset_management.delete_parameter_context, tempL1_ctxt_id) contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C']) expr_id = self.dataset_management.create_parameter_function(name='test_CONDWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['CONDWAT_L1'] = expr, expr_id cl1_pmap = {'C': 'CONDWAT_L0'} expr.param_map = cl1_pmap condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_id=expr_id) self.addCleanup(self.dataset_management.delete_parameter_context, condL1_ctxt_id) contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function(name='test_PRESWAT_L1', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['PRESWAT_L1'] = expr, expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr.param_map = pl1_pmap presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_id=expr_id) self.addCleanup(self.dataset_management.delete_parameter_context, presL1_ctxt_id) contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist) expr_id = self.dataset_management.create_parameter_function(name='test_PRACSAL', parameter_function=expr.dump()) self.addCleanup(self.dataset_management.delete_parameter_function, expr_id) funcs['PRACSAL'] = expr, expr_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} expr.param_map = sal_pmap sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_id=expr_id) self.addCleanup(self.dataset_management.delete_parameter_context, sal_ctxt_id) contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name='test_DENSITY', parameter_context=dens_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id) contexts['DENSITY'] = dens_ctxt, dens_ctxt_id return contexts, funcs
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.pids = [] self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.purge_queues() self.queue_buffer = [] self.streams = [] self.addCleanup(self.stop_all_ingestion) def purge_queues(self): xn = self.container.ex_manager.create_xn_queue( 'science_granule_ingestion') xn.purge() def tearDown(self): self.purge_queues() for pid in self.pids: self.container.proc_manager.terminate_process(pid) IngestionManagementIntTest.clean_subscriptions() for queue in self.queue_buffer: if isinstance(queue, ExchangeNameQueue): queue.delete() elif isinstance(queue, str): xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) dataset_id = self.dataset_management.create_dataset( 'test_dataset_%i' % self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore( datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources( restype=RT.IngestionConfiguration, id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process( 'better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process': { 'stream_id': stream_id }}) self.pids.append(pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition( 'ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream( 'ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self, stream_id, stream_route, offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition( stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self, stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id, route, i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def stop_all_ingestion(self): try: [self.stop_ingestion(sid) for sid in self.streams] except: pass def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance( msg, Granule, 'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents( dataset_id, 'time')[0] granule = self.data_retriever.retrieve_last_data_points( dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context( 'time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- @attr('SMOKE') def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition( 'ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream( 'producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(), '%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi'] * 10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream( 'replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa': slice(0, 5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b, bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id) @unittest.skip('Doesnt work') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream( 'replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop() def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition( 'sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id, route, 0) self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, 20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points( dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp, bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points( dataset_id, 5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15, 20) if not isinstance(comp, bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream( 'replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() self.addCleanup(es.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_modified.wait(30)) query = { 'start_time': 0 - 2208988800, 'end_time': 20 - 2208988800, 'stride_time': 2, 'parameters': ['time', 'temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0, 20, 2) == rdt['time'] self.assertTrue(comp.all(), '%s' % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(['time', 'temp'])) extents = self.dataset_management.dataset_extents( dataset_id=dataset_id, parameters=['time', 'temp']) self.assertTrue(extents['time'] >= 20) self.assertTrue(extents['temp'] >= 20) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id, route, 0) self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, 20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.publish_hifi(stream_id, route, 2) self.publish_hifi(stream_id, route, 3) self.wait_until_we_have_enough_granules(dataset_id, 40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0, 40) if not isinstance(comp, bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) @attr('LOCOINT') @unittest.skipIf(os.getenv( 'CEI_LAUNCH_TEST', False ), 'Host requires file-system access to coverage files, CEI mode does not support.' ) def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago, ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds( dataset_id) self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv( 'CEI_LAUNCH_TEST', False ), 'Host requires file-system access to coverage files, CEI mode does not support.' ) def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds( dataset_id) self.assertEquals([coverage.get_parameter_context('time').fill_value] * 2, temporal_bounds) @attr('LOCOINT') @unittest.skipIf(os.getenv( 'CEI_LAUNCH_TEST', False ), 'Host requires file-system access to coverage files, CEI mode does not support.' ) def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) @attr('LOCOINT') @unittest.skipIf(os.getenv( 'CEI_LAUNCH_TEST', False ), 'Host requires file-system access to coverage files, CEI mode does not support.' ) def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set()) @unittest.skip('Outdated due to ingestion retry') @attr('LOCOINT') @unittest.skipIf(os.getenv( 'CEI_LAUNCH_TEST', False ), 'Host requires file-system access to coverage files, CEI mode does not support.' ) def test_ingestion_failover(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(stream_id, dataset_id) event = Event() def cb(*args, **kwargs): event.set() sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception") sub.start() self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) file_path = DatasetManagementService._get_coverage_path(dataset_id) master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id) with open(master_file, 'w') as f: f.write('this will crash HDF') self.publish_hifi(stream_id, route, 5) self.assertTrue(event.wait(10)) sub.stop()
class DatasetManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() def test_dataset_crud(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) tdom, sdom = time_series_domain() dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) ds_obj = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, 'ctd_dataset') ds_obj.name = 'something different' self.dataset_management.update_dataset(ds_obj) ds_obj2 = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, ds_obj2.name) def test_context_crud(self): context_ids = self.create_contexts() context_id = context_ids.pop() ctxt = self.dataset_management.read_parameter_context(context_id) context = DatasetManagementService.get_coverage_parameter(ctxt) self.assertIsInstance(context, CoverageParameterContext) self.dataset_management.delete_parameter_context(context_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_context(context_id) def test_pfunc_crud(self): contexts, funcs = self.create_pfuncs() context_ids = [context_id for context_id in contexts.itervalues()] pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) expr_id = funcs['CONDWAT_L1'] expr = self.dataset_management.read_parameter_function(expr_id) func_class = DatasetManagementService.get_coverage_function(expr) self.assertIsInstance(func_class, NumexprFunction) def test_pdict_crud(self): context_ids = self.create_contexts() pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time') pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True) pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id) self.assertIsInstance(pdict, ParameterDictionary) self.assertTrue('time_test' in pdict) self.assertEquals(pdict.identifier, pdict_res_id) self.assertEquals(set(pdict_contexts), set(context_ids)) self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id) def create_contexts(self): context_ids = [] cond = ParameterContext(name='condictivity_test', parameter_type='quantity', value_encoding='float32', units='1', fill_value=0) context_ids.append(self.dataset_management.create_parameter(cond)) pres = ParameterContext(name='pressure_test', parameter_type='quantity', value_encoding='float32', units='Pa', fill_value=0) context_ids.append(self.dataset_management.create_parameter(pres)) sal = ParameterContext(name='salinity_test', parameter_type='quantity', value_encoding='float32', units='psu', fill_value=0) context_ids.append(self.dataset_management.create_parameter(sal)) temp = ParameterContext(name='temp_test', parameter_type='quantity', value_encoding='float32', units='degree_C', fill_value=0) context_ids.append(self.dataset_management.create_parameter(temp)) time_test = ParameterContext(name='time_test', parameter_type='quantity', value_encoding='float32', units='seconds since 1970-01-01', fill_value=0) context_ids.append(self.dataset_management.create_parameter(time_test)) return context_ids def create_pfuncs(self): contexts = {} funcs = {} time_ = ParameterContext(name='TIME', parameter_type='quantity', value_encoding='float32', units='seconds since 1900-01-01', fill_value=0) t_ctxt_id = self.dataset_management.create_parameter(time_) contexts['TIME'] = t_ctxt_id lat = ParameterContext(name='LAT', parameter_type='sparse', value_encoding='float32', units='degrees_north', fill_value=-9999.) lat_ctxt_id = self.dataset_management.create_parameter(lat) contexts['LAT'] = lat_ctxt_id lon = ParameterContext(name='LON', parameter_type="sparse", value_encoding='float32', units='degrees_east', fill_value=-9999) lon_ctxt_id = self.dataset_management.create_parameter(lon) contexts['LON'] = lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp = ParameterContext(name='TEMPWAT_L0', parameter_type='quantity', value_encoding='float32', units='deg_C') temp_ctxt_id = self.dataset_management.create_parameter(temp) contexts['TEMPWAT_L0'] = temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond = ParameterContext(name='CONDWAT_L0', parameter_type='quantity', value_encoding='float32', units='S m-1') cond_ctxt_id = self.dataset_management.create_parameter(cond) contexts['CONDWAT_L0'] = cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press = ParameterContext(name='PRESWAT_L0', parameter_type='quantity', value_encoding='float32', units='dbar') press_ctxt_id = self.dataset_management.create_parameter(press) contexts['PRESWAT_L0'] = press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tempwat_f = ParameterFunction(name='TEMPWAT_L1', function_type=PFT.NUMEXPR, function=tl1_func, args=['T']) expr_id = self.dataset_management.create_parameter_function(tempwat_f) funcs['TEMPWAT_L1'] = expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} tempL1 = ParameterContext(name='TEMPWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=tl1_pmap, value_encoding='float32', units='deg_C') tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1) contexts['TEMPWAT_L1'] = tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' condwat_f = ParameterFunction(name='CONDWAT_L1', function_type=PFT.NUMEXPR, function=cl1_func, args=['C']) expr_id = self.dataset_management.create_parameter_function(condwat_f) funcs['CONDWAT_L1'] = expr_id cl1_pmap = {'C': 'CONDWAT_L0'} condL1 = ParameterContext(name='CONDWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=cl1_pmap, value_encoding='float32', units='S m-1') condL1_ctxt_id = self.dataset_management.create_parameter(condL1) contexts['CONDWAT_L1'] = condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' preswat_f = ParameterFunction(name='PRESWAT_L1', function_type=PFT.NUMEXPR, function=pl1_func, args=['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function(preswat_f) funcs['PRESWAT_L1'] = expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} presL1 = ParameterContext(name='PRESWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=pl1_pmap, value_encoding='float32', units='dbar') presL1_ctxt_id = self.dataset_management.create_parameter(presL1) contexts['PRESWAT_L1'] = presL1_ctxt_id # A magic function that may or may not exist actually forms the line below at runtime. cond_f = ParameterFunction(name='condwat10', function_type=PFT.NUMEXPR, function='C*10', args=['C']) expr_id = self.dataset_management.create_parameter_function(cond_f) cond10 = ParameterContext(name='c10', parameter_type='function', parameter_function_id=expr_id, parameter_function_map={'C':'CONDWAT_L1'}, value_encoding='float32', units='1') cond10_id = self.dataset_management.create_parameter(cond10) contexts['C10'] = cond10_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] pracsal_f = ParameterFunction(name='PRACSAL', function_type=PFT.PYTHON, owner=owner, function=sal_func, args=sal_arglist) expr_id = self.dataset_management.create_parameter_function(pracsal_f) funcs['PRACSAL'] = expr_id sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_ctxt = ParameterContext(name='PRACSAL', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=sal_pmap, value_encoding='float32', units='g kg-1') sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt) contexts['PRACSAL'] = sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) return contexts, funcs def test_verify_contexts(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) pcontexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_id) for pcontext in pcontexts: self.assertTrue('fill_value' in pcontext) self.assertTrue('reference_urls' in pcontext) self.assertTrue('internal_name' in pcontext) self.assertTrue('display_name' in pcontext) self.assertTrue('standard_name' in pcontext) self.assertTrue('ooi_short_name' in pcontext) self.assertTrue('description' in pcontext) self.assertTrue('precision' in pcontext)
class BulkIngestBase(object): """ awkward, non-obvious test class! subclasses will implement data-specific methods and this test class will parse sample file and assert data was read. test_data_ingest: create resources and call... start_agent: starts agent and then call... start_listener: starts listeners for data, including one that when granule is received calls... get_retrieve_client: asserts that callback had some data See replacement TestPreloadThenLoadDataset. A little more declarative and straight-forward, but much slower (requires preload). """ def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError("build_param_contexts must be implemented in child classes") def create_external_dataset(self): raise NotImplementedError("create_external_dataset must be implemented in child classes") def get_dvr_config(self): raise NotImplementedError("get_dvr_config must be implemented in child classes") def get_retrieve_client(self, dataset_id=""): raise NotImplementedError("get_retrieve_client must be implemented in child classes") def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=""): return self.dataset_management.create_parameter_dictionary( name=name, parameter_context_ids=self.context_ids, temporal_context="time" ) def create_stream_def(self, name="", pdict_id=""): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name="", description="", stream_def_id=""): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code="Parsed_Canonical", temporal_domain=tdom, spatial_domain=sdom, ) data_product_id = self.data_product_management.create_data_product( data_product=dp_obj, stream_definition_id=stream_def_id ) self.data_product_management.activate_data_product_persistence(data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=""): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=""): dataset_ids, assocs = self.resource_registry.find_objects( subject=data_product_id, predicate="hasDataset", id_only=True ) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) # self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { "driver_config": self.get_dvr_config(), "stream_config": {}, "agent": {"resource_id": self.external_dataset_id}, "test_mode": True, } self._ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config ) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) self.container.terminate_process(self._ia_pid) def start_listener(self, dataset_id=""): dataset_modified = Event() # callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) # callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber( event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin="BaseDataHandler._acquire_sample" ) es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) # let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=""): # logger process producer_definition = ProcessDefinition(name=name + "_logger") producer_definition.executable = { "module": "ion.processes.data.stream_granule_logger", "class": "StreamGranuleLogger", } logger_procdef_id = self.process_dispatch_client.create_process_definition( process_definition=producer_definition ) configuration = {"process": {"stream_id": stream_id}} pid = self.process_dispatch_client.schedule_process( process_definition_id=logger_procdef_id, configuration=configuration ) return pid
class RecordDictionaryIntegrationTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.rdt = None self.data_producer_id = None self.provider_metadata_update = None self.event = Event() def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) for k,v in rdt.iteritems(): np.testing.assert_array_equal(v, self.rdt[k]) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set() def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60)) def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20)) def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) def test_rdt_param_funcs(self): param_funcs = { 'identity' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.interpolation', 'function' : 'identity', 'args':['x'] }, 'ctd_tempwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_tempwat', 'args' : ['t0'] }, 'ctd_preswat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_preswat', 'args' : ["p0", "p_range_psia"] }, 'ctd_condwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_condwat', 'args' : ['c0'] }, 'ctd_pracsal' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_pracsal', 'args' : ['c', 't', 'p'] }, 'ctd_density' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_density', 'args' : ['SP','t','p','lat','lon'] } } pfunc_ids = {} for name, param_def in param_funcs.iteritems(): paramfunc = ParameterFunction(name, **param_def) pf_id = self.dataset_management.create_parameter_function(paramfunc) pfunc_ids[name] = pf_id params = { 'time' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float64', 'units' : 'seconds since 1900-01-01' }, 'temperature_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'pressure_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'conductivity_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'temperature' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_tempwat'], 'parameter_function_map' : { 't0' : 'temperature_counts'}, 'value_encoding' : 'float32', 'units' : 'deg_C' }, 'pressure' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_preswat'], 'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721}, 'value_encoding' : 'float32', 'units' : 'dbar' }, 'conductivity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_condwat'], 'parameter_function_map' : {'c0' : 'conductivity_counts'}, 'value_encoding' : 'float32', 'units' : 'Sm-1' }, 'salinity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_pracsal'], 'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'}, 'value_encoding' : 'float32', 'units' : '1' }, 'density' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_density'], 'parameter_function_map' : { 'SP' : 'salinity', 't' : 'temperature', 'p' : 'pressure', 'lat' : 'lat', 'lon' : 'lon' }, 'value_encoding' : 'float32', 'units' : 'kg m-1' }, 'lat' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_north' }, 'lon' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_east' } } param_dict = {} for name, param in params.iteritems(): pcontext = ParameterContext(name, **param) param_id = self.dataset_management.create_parameter(pcontext) param_dict[name] = param_id pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time') stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temperature_counts'] = [280000] rdt['conductivity_counts'] = [100000] rdt['pressure_counts'] = [2789] rdt['lat'] = [45] rdt['lon'] = [-71] np.testing.assert_allclose(rdt['density'], np.array([1001.00543606])) def test_rdt_lookup(self): rdt = self.create_lookup_rdt() self.assertTrue('offset_a' in rdt.lookup_values()) self.assertFalse('offset_b' in rdt.lookup_values()) rdt['time'] = [0] rdt['temp'] = [10.0] rdt['offset_a'] = [2.0] self.assertEquals(rdt['offset_b'], None) self.assertEquals(rdt.lookup_values(), ['offset_a']) np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0])) svm = StoredValueManager(self.container) svm.stored_value_cas('coefficient_document', {'offset_b':2.0}) svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0}) rdt.fetch_lookup_values() np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0])) np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0])) np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0])) def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = list(contexts.itervalues()) pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_lookup_rdt(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext(name='TIME', parameter_type='quantity', value_encoding='float64', units='seconds since 1900-01-01') t_ctxt_id = self.dataset_management.create_parameter(t_ctxt) contexts['TIME'] = t_ctxt_id lat_ctxt = ParameterContext(name='LAT', parameter_type="sparse", value_encoding='float32', units='degrees_north') lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt) contexts['LAT'] = lat_ctxt_id lon_ctxt = ParameterContext(name='LON', parameter_type='sparse', value_encoding='float32', units='degrees_east') lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt) contexts['LON'] = lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext(name='TEMPWAT_L0', parameter_type='quantity', value_encoding='float32', units='deg_C') temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt) contexts['TEMPWAT_L0'] = temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext(name='CONDWAT_L0', parameter_type='quantity', value_encoding='float32', units='S m-1') cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt) contexts['CONDWAT_L0'] = cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext(name='PRESWAT_L0', parameter_type='quantity', value_encoding='float32', units='dbar') press_ctxt_id = self.dataset_management.create_parameter(press_ctxt) contexts['PRESWAT_L0'] = press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = ParameterFunction(name='TEMPWAT_L1', function_type=PFT.NUMEXPR, function=tl1_func, args=['T']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['TEMPWAT_L1'] = expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=tl1_pmap, value_encoding='float32', units='deg_C') tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt) contexts['TEMPWAT_L1'] = tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = ParameterFunction(name='CONDWAT_L1', function_type=PFT.NUMEXPR, function=cl1_func, args=['C']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['CONDWAT_L1'] = expr_id cl1_pmap = {'C': 'CONDWAT_L0'} condL1_ctxt = ParameterContext(name='CONDWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=cl1_pmap, value_encoding='float32', units='S m-1') condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt) contexts['CONDWAT_L1'] = condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRESWAT_L1'] = expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} presL1_ctxt = ParameterContext(name='PRESWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=pl1_pmap, value_encoding='float32', units='S m-1') presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt) contexts['PRESWAT_L1'] = presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRACSAL'] = expr_id c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C']) expr_id = self.dataset_management.create_parameter_function(c10_f) c10 = ParameterContext(name='c10', parameter_type='function', parameter_function_id=expr_id, parameter_function_map={'C':'CONDWAT_L1'}, value_encoding='float32', units='1') c10_id = self.dataset_management.create_parameter(c10) contexts['c10'] = c10_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_ctxt = ParameterContext(name='PRACSAL', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=sal_pmap, value_encoding='float32', units='g kg-1') sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt) contexts['PRACSAL'] = sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id) contexts['DENSITY'] = dens_ctxt_id return contexts, funcs
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.queue_cleanup = list() self.exchange_cleanup = list() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name("ctd_parsed_param_dict") stream_definition_id = self.pubsub_management.create_stream_definition( "ctd parsed", parameter_dictionary_id=pdict.identifier ) # Make sure there is an assoc self.assertTrue( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True, ) ) # Test Reading stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id) self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary)) # Test Deleting self.pubsub_management.delete_stream_definition(stream_definition_id) self.assertFalse( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True, ) ) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition( "L0 products", parameter_dictionary=pdict.identifier, available_fields=["time", "temp", "conductivity", "pressure"], ) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue( self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id) ) self.assertTrue( self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id) ) out_stream_definition_id = self.pubsub_management.create_stream_definition( "L2 Products", parameter_dictionary=pdict.identifier, available_fields=["time", "salinity", "density"] ) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse( self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id) ) self.assertTrue( self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id) ) def test_validate_stream_defs(self): # test no input incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_0", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_0", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) # test input with no output incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_1", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_1", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) # test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = ["DENSITY"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_2", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_2", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) # test l1 from l0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = ["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_3", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_3", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) # test l2 from l0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1", "DENSITY", "PRACSAL"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = ["DENSITY", "PRACSAL"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_4", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_4", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) # test Ln from L0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = ["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_5", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_5", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) # test L2 from L1 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"] available_fields_out = ["DENSITY", "PRACSAL"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_6", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_6", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) # test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon"]) outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon"] available_fields_out = ["DENSITY", "PRACSAL"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_7", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_7", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) # test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(["time", "lat", "lon"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]) available_fields_in = ["time", "lat", "lon"] available_fields_out = ["DENSITY", "PRACSAL"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_8", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_8", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) # test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]) outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL"]) available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"] available_fields_out = ["DENSITY", "PRACSAL"] incoming_stream_def_id = self.pubsub_management.create_stream_definition( "in_sd_9", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in ) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( "out_sd_9", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out ) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition("test_definition", stream_type="stream") topic_id = self.pubsub_management.create_topic(name="test_topic", exchange_point="test_exchange") self.exchange_cleanup.append("test_exchange") topic2_id = self.pubsub_management.create_topic(name="another_topic", exchange_point="outside") stream_id, route = self.pubsub_management.create_stream( name="test_stream", topic_ids=[topic_id, topic2_id], exchange_point="test_exchange", stream_definition_id=stream_def_id, ) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics, [topic_id]) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True ) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name, "test_stream") self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True ) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) self.pubsub_management.delete_topic(topic_id) self.pubsub_management.delete_topic(topic2_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition("test_definition", stream_type="stream") stream_id, route = self.pubsub_management.create_stream( name="test_stream", exchange_point="test_exchange", stream_definition_id=stream_def_id ) subscription_id = self.pubsub_management.create_subscription( name="test subscription", stream_ids=[stream_id], exchange_name="test_queue" ) self.exchange_cleanup.append("test_exchange") subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True ) self.assertEquals(subs, [stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="test_queue", id_only=True) self.assertEquals(len(res), 1) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription(subscription_id) self.assertEquals(subscription.exchange_name, "test_queue") self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True ) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp") # -------------------------------------------------------------------------------- # Test moving before activate # -------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue") xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp") # -------------------------------------------------------------------------------- # Test moving after activate # -------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, "verified") self.verified.set() subscriber = StandaloneStreamSubscriber("second_queue", verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue") xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish("verified") self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream("test_stream", "xp1") xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1") for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription("queue1", stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1") self.assertEquals(len(xn_ids), 1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1") self.assertEquals(len(xn_ids), 0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream("stream1", "xp1") subscription_id = self.pubsub_management.create_subscription("sub1", stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber("sub1", verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish("should not receive") self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish("should receive") self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish("should not receive") self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish("should receive") self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic(name="test_topic", exchange_point="test_xp") self.exchange_cleanup.append("test_xp") topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name, "test_topic") self.assertEquals(topic.exchange_point, "test_xp") self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m, r, s): self.sub1_sat.set() def subscriber2(m, r, s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber("sub1", subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber("sub2", subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic("instrument_logs", exchange_point="instruments") science_topic = self.pubsub_management.create_topic("science_data", exchange_point="instruments") events_topic = self.pubsub_management.create_topic("notifications", exchange_point="events") log_stream, route = self.pubsub_management.create_stream( "instrument1-logs", topic_ids=[log_topic], exchange_point="instruments" ) ctd_stream, route = self.pubsub_management.create_stream( "instrument1-ctd", topic_ids=[science_topic], exchange_point="instruments" ) event_stream, route = self.pubsub_management.create_stream( "notifications", topic_ids=[events_topic], exchange_point="events" ) raw_stream, route = self.pubsub_management.create_stream("temp", exchange_point="global.data") self.exchange_cleanup.extend(["instruments", "events", "global.data"]) subscription1 = self.pubsub_management.create_subscription( "subscription1", stream_ids=[log_stream, event_stream], exchange_name="sub1" ) subscription2 = self.pubsub_management.create_subscription( "subscription2", exchange_points=["global.data"], stream_ids=[ctd_stream], exchange_name="sub2" ) self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop() def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m, r, s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber("sub1", subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic("topic1", exchange_point="xp1") topic2 = self.pubsub_management.create_topic("topic2", exchange_point="xp1", parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic("topic3", exchange_point="xp1", parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic("topic4", exchange_point="xp1", parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic("topic5", exchange_point="xp1", parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic("topic6", exchange_point="xp1", parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic("topic7", exchange_point="xp1", parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic("topic8", exchange_point="xp2") topic9 = self.pubsub_management.create_topic("topic9", exchange_point="xp2", parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic("topic10", exchange_point="xp2", parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic("topic11", exchange_point="xp2", parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic("topic12", exchange_point="xp2", parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic("topic13", exchange_point="xp2", parent_topic_id=topic11) self.exchange_cleanup.extend(["xp1", "xp2"]) stream1_id, route = self.pubsub_management.create_stream( "stream1", topic_ids=[topic7, topic4, topic5], exchange_point="xp1" ) stream2_id, route = self.pubsub_management.create_stream("stream2", topic_ids=[topic8], exchange_point="xp2") stream3_id, route = self.pubsub_management.create_stream( "stream3", topic_ids=[topic10, topic13], exchange_point="xp2" ) stream4_id, route = self.pubsub_management.create_stream("stream4", topic_ids=[topic9], exchange_point="xp2") stream5_id, route = self.pubsub_management.create_stream("stream5", topic_ids=[topic11], exchange_point="xp2") subscription1 = self.pubsub_management.create_subscription("sub1", topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription("sub2", topic_ids=[topic8], exchange_name="sub1") subscription3 = self.pubsub_management.create_subscription("sub3", topic_ids=[topic9], exchange_name="sub1") subscription4 = self.pubsub_management.create_subscription( "sub4", topic_ids=[topic10, topic13, topic11], exchange_name="sub1" ) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id, 1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id, 2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10), 4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10), 6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) # -------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext("time", param_type=QuantityType(value_encoding=np.dtype("int64"))) t_ctxt.uom = "seconds since 01-01-1900" t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management.create_parameter_context( name="time", parameter_context=t_ctxt.dump(), parameter_type="quantity<int64>", unit_of_measure=t_ctxt.uom ) lat_ctxt = ParameterContext("lat", param_type=ConstantType(QuantityType(value_encoding=np.dtype("float32")))) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = "degree_north" lat_ctxt.fill_value = -9999 lat_ctxt_id = self.dataset_management.create_parameter_context( name="lat", parameter_context=lat_ctxt.dump(), parameter_type="quantity<float32>", unit_of_measure=lat_ctxt.uom, ) lon_ctxt = ParameterContext("lon", param_type=ConstantType(QuantityType(value_encoding=np.dtype("float32")))) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = "degree_east" lon_ctxt.fill_value = -9999 lon_ctxt_id = self.dataset_management.create_parameter_context( name="lon", parameter_context=lon_ctxt.dump(), parameter_type="quantity<float32>", unit_of_measure=lon_ctxt.uom, ) temp_ctxt = ParameterContext("TEMPWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32"))) temp_ctxt.uom = "deg_C" temp_ctxt.fill_value = -9999 temp_ctxt_id = self.dataset_management.create_parameter_context( name="TEMPWAT_L0", parameter_context=temp_ctxt.dump(), parameter_type="quantity<float32>", unit_of_measure=temp_ctxt.uom, ) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext("CONDWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32"))) cond_ctxt.uom = "S m-1" cond_ctxt.fill_value = -9999 cond_ctxt_id = self.dataset_management.create_parameter_context( name="CONDWAT_L0", parameter_context=cond_ctxt.dump(), parameter_type="quantity<float32>", unit_of_measure=cond_ctxt.uom, ) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext("PRESWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32"))) press_ctxt.uom = "dbar" press_ctxt.fill_value = -9999 press_ctxt_id = self.dataset_management.create_parameter_context( name="PRESWAT_L0", parameter_context=press_ctxt.dump(), parameter_type="quantity<float32>", unit_of_measure=press_ctxt.uom, ) # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = "(TEMPWAT_L0 / 10000) - 10" tl1_pmap = {"TEMPWAT_L0": "TEMPWAT_L0"} func = NumexprFunction("TEMPWAT_L1", tl1_func, tl1_pmap) tempL1_ctxt = ParameterContext( "TEMPWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL ) tempL1_ctxt.uom = "deg_C" tempL1_ctxt_id = self.dataset_management.create_parameter_context( name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type="pfunc", unit_of_measure=tempL1_ctxt.uom, ) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = "(CONDWAT_L0 / 100000) - 0.5" cl1_pmap = {"CONDWAT_L0": "CONDWAT_L0"} func = NumexprFunction("CONDWAT_L1", cl1_func, cl1_pmap) condL1_ctxt = ParameterContext( "CONDWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL ) condL1_ctxt.uom = "S m-1" condL1_ctxt_id = self.dataset_management.create_parameter_context( name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type="pfunc", unit_of_measure=condL1_ctxt.uom, ) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = "(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)" pl1_pmap = {"PRESWAT_L0": "PRESWAT_L0"} func = NumexprFunction("PRESWAT_L1", pl1_func, pl1_pmap) presL1_ctxt = ParameterContext( "PRESWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL ) presL1_ctxt.uom = "S m-1" presL1_ctxt_id = self.dataset_management.create_parameter_context( name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type="pfunc", unit_of_measure=presL1_ctxt.uom, ) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = "gsw" sal_func = "SP_from_C" sal_arglist = [NumexprFunction("CONDWAT_L1*10", "C*10", {"C": "CONDWAT_L1"}), "TEMPWAT_L1", "PRESWAT_L1"] sal_kwargmap = None func = PythonFunction("PRACSAL", owner, sal_func, sal_arglist, sal_kwargmap) sal_ctxt = ParameterContext( "PRACSAL", param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL ) sal_ctxt.uom = "g kg-1" sal_ctxt_id = self.dataset_management.create_parameter_context( name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type="pfunc", unit_of_measure=sal_ctxt.uom ) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = "gsw" abs_sal_func = PythonFunction("abs_sal", owner, "SA_from_SP", ["PRACSAL", "PRESWAT_L1", "lon", "lat"], None) # abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None) cons_temp_func = PythonFunction( "cons_temp", owner, "CT_from_t", [abs_sal_func, "TEMPWAT_L1", "PRESWAT_L1"], None ) dens_func = PythonFunction("DENSITY", owner, "rho", [abs_sal_func, cons_temp_func, "PRESWAT_L1"], None) dens_ctxt = ParameterContext( "DENSITY", param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL ) dens_ctxt.uom = "kg m-3" dens_ctxt_id = self.dataset_management.create_parameter_context( name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type="pfunc", unit_of_measure=dens_ctxt.uom, ) ids = [ t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id, ] contexts = [ t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt, ] context_ids = [ids[i] for i, ctxt in enumerate(contexts) if ctxt.name in filter_values] pdict_name = "_".join([ctxt.name for ctxt in contexts if ctxt.name in filter_values]) pdict_id = self.dataset_management.create_parameter_dictionary( pdict_name, parameter_context_ids=context_ids, temporal_context="time" ) return pdict_id
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='CTDBP_L0_all', description='Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject(RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject(RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process self.output_products = {'L0_stream' : L0_stream_dp_id} out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
class BulkIngestBase(object): """ awkward, non-obvious test class! subclasses will implement data-specific methods and this test class will parse sample file and assert data was read. test_data_ingest: create resources and call... start_agent: starts agent and then call... start_listener: starts listeners for data, including one that when granule is received calls... get_retrieve_client: asserts that callback had some data See replacement TestPreloadThenLoadDataset. A little more declarative and straight-forward, but much slower (requires preload). """ def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError('build_param_contexts must be implemented in child classes') def create_external_dataset(self): raise NotImplementedError('create_external_dataset must be implemented in child classes') def get_dvr_config(self): raise NotImplementedError('get_dvr_config must be implemented in child classes') def get_retrieve_client(self, dataset_id=''): raise NotImplementedError('get_retrieve_client must be implemented in child classes') def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=''): return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time') def create_stream_def(self, name='', pdict_id=''): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name='', description='', stream_def_id=''): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=''): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=''): dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) #self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { 'driver_config': self.get_dvr_config(), 'stream_config': {}, 'agent': {'resource_id': self.external_dataset_id}, 'test_mode': True } self._ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) self.container.terminate_process(self._ia_pid) def start_listener(self, dataset_id=''): dataset_modified = Event() #callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) #callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample') es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) #let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.pdicts = {} self.queue_cleanup = list() self.exchange_cleanup = list() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name( 'ctd_parsed_param_dict') stream_definition_id = self.pubsub_management.create_stream_definition( 'ctd parsed', parameter_dictionary_id=pdict.identifier) # Make sure there is an assoc self.assertTrue( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test Reading stream_definition = self.pubsub_management.read_stream_definition( stream_definition_id) self.assertTrue( PubsubManagementService._compare_pdicts( pdict.dump(), stream_definition.parameter_dictionary)) # Test Deleting self.pubsub_management.delete_stream_definition(stream_definition_id) self.assertFalse( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition( 'L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time', 'temp', 'conductivity', 'pressure']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue( self.pubsub_management.compare_stream_definition( in_stream_definition_id, out_stream_definition_id)) self.assertTrue( self.pubsub_management.compatible_stream_definitions( in_stream_definition_id, out_stream_definition_id)) out_stream_definition_id = self.pubsub_management.create_stream_definition( 'L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time', 'salinity', 'density']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse( self.pubsub_management.compare_stream_definition( in_stream_definition_id, out_stream_definition_id)) self.assertTrue( self.pubsub_management.compatible_stream_definitions( in_stream_definition_id, out_stream_definition_id)) def test_validate_stream_defs(self): #test no input incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test input with no output incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test l1 from l0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test l2 from l0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test Ln from L0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = [ 'DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1' ] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L2 from L1 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition( 'test_definition', stream_type='stream') topic_id = self.pubsub_management.create_topic( name='test_topic', exchange_point='test_exchange') self.exchange_cleanup.append('test_exchange') topic2_id = self.pubsub_management.create_topic( name='another_topic', exchange_point='outside') stream_id, route = self.pubsub_management.create_stream( name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id) topics, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics, [topic_id]) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name, 'test_stream') self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) self.pubsub_management.delete_topic(topic_id) self.pubsub_management.delete_topic(topic2_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition( 'test_definition', stream_type='stream') stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id) subscription_id = self.pubsub_management.create_subscription( name='test subscription', stream_ids=[stream_id], exchange_name='test_queue') self.exchange_cleanup.append('test_exchange') subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True) self.assertEquals(subs, [stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True) self.assertEquals(len(res), 1) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription( subscription_id) self.assertEquals(subscription.exchange_name, 'test_queue') self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving before activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, 'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream( 'test_stream', 'xp1') xn_objs, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription( 'queue1', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids), 1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids), 0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream( 'stream1', 'xp1') subscription_id = self.pubsub_management.create_subscription( 'sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1', verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic( name='test_topic', exchange_point='test_xp') self.exchange_cleanup.append('test_xp') topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name, 'test_topic') self.assertEquals(topic.exchange_point, 'test_xp') self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m, r, s): self.sub1_sat.set() def subscriber2(m, r, s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber('sub2', subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic( 'instrument_logs', exchange_point='instruments') science_topic = self.pubsub_management.create_topic( 'science_data', exchange_point='instruments') events_topic = self.pubsub_management.create_topic( 'notifications', exchange_point='events') log_stream, route = self.pubsub_management.create_stream( 'instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') ctd_stream, route = self.pubsub_management.create_stream( 'instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') event_stream, route = self.pubsub_management.create_stream( 'notifications', topic_ids=[events_topic], exchange_point='events') raw_stream, route = self.pubsub_management.create_stream( 'temp', exchange_point='global.data') self.exchange_cleanup.extend(['instruments', 'events', 'global.data']) subscription1 = self.pubsub_management.create_subscription( 'subscription1', stream_ids=[log_stream, event_stream], exchange_name='sub1') subscription2 = self.pubsub_management.create_subscription( 'subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop() def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m, r, s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.exchange_cleanup.extend(['xp1', 'xp2']) stream1_id, route = self.pubsub_management.create_stream( 'stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') stream2_id, route = self.pubsub_management.create_stream( 'stream2', topic_ids=[topic8], exchange_point='xp2') stream3_id, route = self.pubsub_management.create_stream( 'stream3', topic_ids=[topic10, topic13], exchange_point='xp2') stream4_id, route = self.pubsub_management.create_stream( 'stream4', topic_ids=[topic9], exchange_point='xp2') stream5_id, route = self.pubsub_management.create_stream( 'stream5', topic_ids=[topic11], exchange_point='xp2') subscription1 = self.pubsub_management.create_subscription( 'sub1', topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription( 'sub2', topic_ids=[topic8], exchange_name='sub1') subscription3 = self.pubsub_management.create_subscription( 'sub3', topic_ids=[topic9], exchange_name='sub1') subscription4 = self.pubsub_management.create_subscription( 'sub4', topic_ids=[topic10, topic13, topic11], exchange_name='sub1') #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id, 1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id, 2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10), 4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10), 6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) #-------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext( 'TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context( name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom) lat_ctxt = ParameterContext( 'LAT', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context( name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom) lon_ctxt = ParameterContext( 'LON', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context( name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom) # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext( 'TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context( name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext( 'CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context( name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext( 'PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context( name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom) # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tl1_pmap = {'T': 'TEMPWAT_L0'} expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap) tempL1_ctxt = ParameterContext( 'TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context( name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' cl1_pmap = {'C': 'CONDWAT_L0'} expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap) condL1_ctxt = ParameterContext( 'CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context( name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap) presL1_ctxt = ParameterContext( 'PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context( name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] sal_pmap = { 'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1' } sal_kwargmap = None expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context( name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT']) cons_temp_expr = PythonFunction( 'cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction( 'DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext( 'DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context( name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom) ids = [ t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id ] contexts = [ t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt ] context_ids = [ ids[i] for i, ctxt in enumerate(contexts) if ctxt.name in filter_values ] pdict_name = '_'.join( [ctxt.name for ctxt in contexts if ctxt.name in filter_values]) try: self.pdicts[pdict_name] return self.pdicts[pdict_name] except KeyError: pdict_id = self.dataset_management.create_parameter_dictionary( pdict_name, parameter_context_ids=context_ids, temporal_context='time') self.pdicts[pdict_name] = pdict_id return pdict_id
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.pids = [] self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.purge_queues() self.queue_buffer = [] self.streams = [] self.addCleanup(self.stop_all_ingestion) def purge_queues(self): xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion') xn.purge() def tearDown(self): self.purge_queues() for pid in self.pids: self.container.proc_manager.terminate_process(pid) IngestionManagementIntTest.clean_subscriptions() for queue in self.queue_buffer: if isinstance(queue, ExchangeNameQueue): queue.delete() elif isinstance(queue, str): xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset_id = self.dataset_management.create_dataset('test_dataset_%i'%self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.pids.append(pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id,route,i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def stop_all_ingestion(self): try: [self.stop_ingestion(sid) for sid in self.streams] except: pass def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0] granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- @attr('SMOKE') def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal(rdt_out['density'], np.array([1021.7144739593881])) np.testing.assert_array_almost_equal(rdt_out['salinity'], np.array([30.935132729668283])) def test_qc_events(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_qc_pdict() stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.arange(10) * 3 verified = Event() def verification(event, *args, **kwargs): self.assertEquals(event.qc_parameter, 'temp_qc') self.assertEquals(event.temporal_value, 7) verified.set() es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(10)) def test_lookup_values_ingest_replay(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() config.process.lookup_docs = ['test1', 'test2'] self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) stored_value_manager = StoredValueManager(self.container) stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1}) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20)) np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20)) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20,40) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor.event.clear() stored_value_manager.stored_value_cas('test1',{'offset_a':20.0}) stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0}) gevent.sleep(2) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20)) np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20)) np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20)) @unittest.skip('Doesnt work') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_simplex_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop() def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve(dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i,0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.event.wait(30)) query = { 'start_time': 0 - 2208988800, 'end_time': 20 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0,20,2) == rdt['time'] self.assertTrue(comp.all(),'%s' % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_simplex_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_simplex_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set()) def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_thorough_gap_analysis(self): dataset_id = self.test_ingestion_gap_analysis() vcov = DatasetManagementService._get_coverage(dataset_id) self.assertIsInstance(vcov,ViewCoverage) ccov = vcov.reference_coverage self.assertIsInstance(ccov, ComplexCoverage) self.assertEquals(len(ccov._reference_covs), 3) def test_ingestion_gap_analysis(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) connection1 = uuid4().hex connection2 = uuid4().hex rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [0] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0')) rdt['time'] = [1] rdt['temp'] = [1] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1)) rdt['time'] = [2] rdt['temp'] = [2] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message rdt['time'] = [3] rdt['temp'] = [3] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection rdt['time'] = [4] rdt['temp'] = [4] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4')) rdt['time'] = [5] rdt['temp'] = [5] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(6)) np.testing.assert_array_equal(rdt['temp'], np.arange(6)) return dataset_id @unittest.skip('Outdated due to ingestion retry') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_ingestion_failover(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) event = Event() def cb(*args, **kwargs): event.set() sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception") sub.start() self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) file_path = DatasetManagementService._get_coverage_path(dataset_id) master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id) with open(master_file, 'w') as f: f.write('this will crash HDF') self.publish_hifi(stream_id, route, 5) self.assertTrue(event.wait(10)) sub.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_coverage_types(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() cov = DatasetManagementService._get_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, ViewCoverage) cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, SimplexCoverage)
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dms_cli = DatasetManagementServiceClient() dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() pubsub_cli = PubsubManagementServiceClient() eda = ExternalDatasetAgent(name='example data agent', handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls']) eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance( name='example dataset agent instance') eda_inst_id = dams_cli.create_external_dataset_agent_instance( eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(name='example data provider', institution=Institution(), contact=ContactInformation()) dprov.contact.individual_names_given = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(name='example datasource', protocol_type='FILE', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.individual_names_given = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'ruv_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) dset.dataset_description.parameters['base_url'] = 'test_data/ruv/' dset.dataset_description.parameters[ 'list_pattern'] = 'RDLi_SEAB_2011_08_24_1600.ruv' dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H %M' dset.dataset_description.parameters[ 'date_extraction_pattern'] = 'RDLi_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv' dset.dataset_description.parameters['temporal_dimension'] = None dset.dataset_description.parameters['zonal_dimension'] = None dset.dataset_description.parameters['meridional_dimension'] = None dset.dataset_description.parameters['vertical_dimension'] = None dset.dataset_description.parameters['variables'] = [] # Create DataSourceModel dsrc_model = DataSourceModel(name='ruv_model') #dsrc_model.model = 'RUV' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider( external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set( external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider( data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model( data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source( external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance( external_dataset_id=ds_id, agent_instance_id=eda_inst_id) pdict = ParameterDictionary() t_ctxt = ParameterContext( 'data', param_type=QuantityType(value_encoding=numpy.dtype('int64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): pc_list.append(dms_cli.create_parameter_context( pc_k, pc[1].dump())) pdict_id = dms_cli.create_parameter_dictionary('ruv_param_dict', pc_list) streamdef_id = pubsub_cli.create_stream_definition( name="ruv", description="stream def for ruv testing", parameter_dictionary_id=pdict_id) dprod = IonObject(RT.DataProduct, name='ruv_parsed_product', description='parsed ruv product') # Generate the data product and associate it to the ExternalDataset dproduct_id = dpms_cli.create_data_product( data_product=dprod, stream_definition_id=streamdef_id) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({ 'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id })) #CBM: Eventually, probably want to group this crap somehow - not sure how yet... # Create the logger for receiving publications _, stream_route, _ = self.create_stream_and_logger(name='ruv', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'stream_route': stream_route, 'external_dataset_res': dset, 'param_dictionary': pdict.dump(), 'data_producer_id': dproducer_id, # CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 20, }
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i + length) for field in rdt: if isinstance( rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() self.i += length return g def _create_input_param_dict_for_test(self, parameter_dict_name=''): pdict = ParameterDictionary() t_ctxt = ParameterContext( 'time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext( 'conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext( 'pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext( 'temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext( 'density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext( 'salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context( pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary( parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='CTDBP_L0_all', description= 'Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition( dpd_obj) self.addCleanup( self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test( parameter_dict_name='fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition( name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject( RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) input_dp_id = self.dataproduct_management.create_data_product( data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject( RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects( L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L0_stream_dp_id], configuration=None) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects( input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet( stream_definition_id=input_stream_def_dict, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
class DatasetManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() def test_dataset_crud(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) tdom, sdom = time_series_domain() dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) ds_obj = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, 'ctd_dataset') ds_obj.name = 'something different' self.dataset_management.update_dataset(ds_obj) self.dataset_management.register_dataset(dataset_id) ds_obj2 = self.dataset_management.read_dataset(dataset_id) self.assertEquals(ds_obj.name, ds_obj2.name) self.assertTrue(ds_obj2.registered) def test_context_crud(self): context_ids = self.create_contexts() context_id = context_ids.pop() context = DatasetManagementService.get_parameter_context(context_id) self.assertIsInstance(context, ParameterContext) self.assertEquals(context.identifier, context_id) self.dataset_management.delete_parameter_context(context_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_context(context_id) def test_pfunc_crud(self): contexts, funcs = self.create_pfuncs() context_ids = [context_id for ctxt,context_id in contexts.itervalues()] pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) expr, expr_id = funcs['CONDWAT_L1'] func_class = DatasetManagementService.get_parameter_function(expr_id) self.assertIsInstance(func_class, NumexprFunction) def test_pdict_crud(self): context_ids = self.create_contexts() pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time') pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True) pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id) self.assertIsInstance(pdict, ParameterDictionary) self.assertTrue('time_test' in pdict) self.assertEquals(pdict.identifier, pdict_res_id) self.assertEquals(set(pdict_contexts), set(context_ids)) self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id) with self.assertRaises(NotFound): self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id) def create_contexts(self): context_ids = [] cond_ctxt = ParameterContext('conductivity_test', param_type=QuantityType(value_encoding=np.float32)) cond_ctxt.uom = 'unknown' cond_ctxt.fill_value = 0e0 context_ids.append(self.dataset_management.create_parameter_context(name='conductivity_test', parameter_context=cond_ctxt.dump())) pres_ctxt = ParameterContext('pressure_test', param_type=QuantityType(value_encoding=np.float32)) pres_ctxt.uom = 'Pascal' pres_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='pressure_test', parameter_context=pres_ctxt.dump())) sal_ctxt = ParameterContext('salinity_test', param_type=QuantityType(value_encoding=np.float32)) sal_ctxt.uom = 'PSU' sal_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='salinity_test', parameter_context=sal_ctxt.dump())) temp_ctxt = ParameterContext('temp_test', param_type=QuantityType(value_encoding=np.float32)) temp_ctxt.uom = 'degree_Celsius' temp_ctxt.fill_value = 0e0 context_ids.append(self.dataset_management.create_parameter_context(name='temp_test', parameter_context=temp_ctxt.dump())) t_ctxt = ParameterContext('time_test', param_type=QuantityType(value_encoding=np.int64)) t_ctxt.uom = 'seconds since 1970-01-01' t_ctxt.fill_value = 0x0 context_ids.append(self.dataset_management.create_parameter_context(name='time_test', parameter_context=t_ctxt.dump())) return context_ids def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context(name='test_TIME', parameter_context=t_ctxt.dump()) contexts['TIME'] = (t_ctxt, t_ctxt_id) lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context(name='test_LAT', parameter_context=lat_ctxt.dump()) contexts['LAT'] = lat_ctxt, lat_ctxt_id lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context(name='test_LON', parameter_context=lon_ctxt.dump()) contexts['LON'] = lon_ctxt, lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump()) contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump()) contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRESWAT_L0', parameter_context=press_ctxt.dump()) contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T']) expr_id = self.dataset_management.create_parameter_function(name='test_TEMPWAT_L1', parameter_function=expr.dump()) funcs['TEMPWAT_L1'] = expr, expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} expr.param_map = tl1_pmap tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_id=expr_id) contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C']) expr_id = self.dataset_management.create_parameter_function(name='test_CONDWAT_L1', parameter_function=expr.dump()) funcs['CONDWAT_L1'] = expr, expr_id cl1_pmap = {'C': 'CONDWAT_L0'} expr.param_map = cl1_pmap condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_id=expr_id) contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range']) expr_id = self.dataset_management.create_parameter_function(name='test_PRESWAT_L1', parameter_function=expr.dump()) funcs['PRESWAT_L1'] = expr, expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr.param_map = pl1_pmap presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_id=expr_id) contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist) expr_id = self.dataset_management.create_parameter_function(name='test_PRACSAL', parameter_function=expr.dump()) funcs['PRACSAL'] = expr, expr_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} expr.param_map = sal_pmap sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_id=expr_id) contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name='test_DENSITY', parameter_context=dens_ctxt.dump()) contexts['DENSITY'] = dens_ctxt, dens_ctxt_id return contexts, funcs def test_verify_contexts(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) pcontexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_id) for pcontext in pcontexts: self.assertTrue('fill_value' in pcontext) self.assertTrue('reference_urls' in pcontext) self.assertTrue('internal_name' in pcontext) self.assertTrue('display_name' in pcontext) self.assertTrue('standard_name' in pcontext) self.assertTrue('ooi_short_name' in pcontext) self.assertTrue('description' in pcontext) self.assertTrue('precision' in pcontext)
class BulkIngestBase(object): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError('build_param_contexts must be implemented in child classes') def create_external_dataset(self): raise NotImplementedError('create_external_dataset must be implemented in child classes') def get_dvr_config(self): raise NotImplementedError('get_dvr_config must be implemented in child classes') def get_retrieve_client(self, dataset_id=''): raise NotImplementedError('get_retrieve_client must be implemented in child classes') def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=''): return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time') def create_stream_def(self, name='', pdict_id=''): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name='', description='', stream_def_id=''): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=''): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=''): dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) #self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { 'driver_config': self.get_dvr_config(), 'stream_config': {}, 'agent': {'resource_id': self.external_dataset_id}, 'test_mode': True } _ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) def start_listener(self, dataset_id=''): dataset_modified = Event() #callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) #callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample') es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) #let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid
class CtdTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber self.queue_cleanup = [] self.data_process_cleanup = [] def _create_input_param_dict_for_test(self, parameter_dict_name=""): pdict = ParameterDictionary() t_ctxt = ParameterContext("time", param_type=QuantityType(value_encoding=numpy.dtype("float64"))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = "seconds since 01-01-1900" pdict.add_context(t_ctxt) cond_ctxt = ParameterContext("conductivity", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) cond_ctxt.uom = "" pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext("pressure", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) pres_ctxt.uom = "" pdict.add_context(pres_ctxt) if parameter_dict_name == "input_param_dict": temp_ctxt = ParameterContext("temperature", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) else: temp_ctxt = ParameterContext("temp", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) temp_ctxt.uom = "" pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext("density", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) dens_ctxt.uom = "" pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext("salinity", param_type=QuantityType(value_encoding=numpy.dtype("float32"))) sal_ctxt.uom = "" pdict.add_context(sal_ctxt) # create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) if parameter_dict_name == "input_param_dict": self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id) elif parameter_dict_name == "output_param_dict" and pc[1].name == "temp": self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt["time"] = numpy.arange(self.i, self.i + length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() self.i += length return g def _create_calibration_coefficients_dict(self): config = DotDict() config.process.calibration_coeffs = { "temp_calibration_coeffs": { "TA0": 1.561342e-03, "TA1": 2.561486e-04, "TA2": 1.896537e-07, "TA3": 1.301189e-07, "TOFFSET": 0.000000e00, }, "cond_calibration_coeffs": { "G": -9.896568e-01, "H": 1.316599e-01, "I": -2.213854e-04, "J": 3.292199e-05, "CPCOR": -9.570000e-08, "CTCOR": 3.250000e-06, "CSLOPE": 1.000000e00, }, "pres_calibration_coeffs": { "PA0": 4.960417e-02, "PA1": 4.883682e-04, "PA2": -5.687309e-12, "PTCA0": 5.249802e05, "PTCA1": 7.595719e00, "PTCA2": -1.322776e-01, "PTCB0": 2.503125e01, "PTCB1": 5.000000e-05, "PTCB2": 0.000000e00, "PTEMPA0": -6.431504e01, "PTEMPA1": 5.168177e01, "PTEMPA2": -2.847757e-01, "POFFSET": 0.000000e00, }, } return config def clean_queues(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def cleaning_operations(self): for dproc_id in self.data_process_cleanup: self.data_process_management.delete_data_process(dproc_id) def test_ctd_L1_all(self): """ Test that packets are processed by the ctd_L1_all transform """ # ----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name="CTDBP_L1_Transform", description="Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.", module="ion.processes.data.transforms.ctdbp.ctdbp_L1", class_name="CTDBP_L1_Transform", ) dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) # ----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() # Get the stream definition for the stream using the parameter dictionary L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name="input_param_dict") L0_stream_def_id = self.pubsub.create_stream_definition(name="parsed", parameter_dictionary_id=L0_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id) L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name="output_param_dict") L1_stream_def_id = self.pubsub.create_stream_definition(name="L1_out", parameter_dictionary_id=L1_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id) log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id) log.debug("Got the stream def for parsed input: %s", L0_stream_def_id) log.debug("got the stream def for the output: %s", L1_stream_def_id) # Input data product L0_stream_dp_obj = IonObject( RT.DataProduct, name="L0_stream", description="L0 stream input to CTBP L1 transform", temporal_domain=tdom, spatial_domain=sdom, ) input_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=L0_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L1_stream_dp_obj = IonObject( RT.DataProduct, name="L1_stream", description="L1_stream output of CTBP L1 transform", temporal_domain=tdom, spatial_domain=sdom, ) L1_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L1_stream_dp_obj, stream_definition_id=L1_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id) # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] config = self._create_calibration_coefficients_dict() dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L1_stream_dp_id], configuration=config, ) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) # ----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) input_stream_id = stream_ids[0] input_stream = self.resource_registry.read(input_stream_id) stream_route = input_stream.stream_route log.debug("The input stream for the L1 transform: %s", input_stream_id) # ----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name="sub", callback=subscriber) sub_id = self.pubsub.create_subscription( "subscriber_to_transform", stream_ids=[output_stream_id], exchange_name="sub" ) self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) # ----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L1 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ rdt = RecordDictionaryTool.load_from_granule(granule) self.assertIn("pressure", rdt) self.assertIn("temp", rdt) self.assertIn("conductivity", rdt) self.assertIn("time", rdt)