def test_visual_transform(self): input_data_product_id = self.ctd_plain_input_data_product() output_data_product_id = self.google_dt_data_product() dpd = DataProcessDefinition(name='visual transform') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.viz.google_dt' dpd.class_name = 'VizTransformGoogleDT' #-------------------------------------------------------------------------------- # Walk before we base jump #-------------------------------------------------------------------------------- data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2,data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) self.assertTrue(rdt['google_dt_components'] is not None) validated.set() self.setup_subscriber(output_data_product_id, callback=validation) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(validated.wait(10))
def test_older_transform(self): input_data_product_id = self.ctd_plain_input_data_product() conductivity_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'conductivity_product', ['time', 'conductivity']) conductivity_stream_def_id = self.get_named_stream_def('conductivity_product stream_def') temperature_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'temperature_product', ['time', 'temp']) temperature_stream_def_id = self.get_named_stream_def('temperature_product stream_def') pressure_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'pressure_product', ['time', 'pressure']) pressure_stream_def_id = self.get_named_stream_def('pressure_product stream_def') dpd = DataProcessDefinition(name='ctdL0') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.ctd.ctd_L0_all' dpd.class_name = 'ctd_L0_all' data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) self.data_process_management.assign_stream_definition_to_data_process_definition(conductivity_stream_def_id, data_process_definition_id, binding='conductivity') self.data_process_management.assign_stream_definition_to_data_process_definition(temperature_stream_def_id, data_process_definition_id, binding='temperature') self.data_process_management.assign_stream_definition_to_data_process_definition(pressure_stream_def_id, data_process_definition_id, binding='pressure') data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[conductivity_data_product_id, temperature_data_product_id, pressure_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) conductivity_validated = Event() def validate_conductivity(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity'], np.array([4.2914])) conductivity_validated.set() self.setup_subscriber(conductivity_data_product_id, callback=validate_conductivity) temperature_validated = Event() def validate_temperature(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['temp'], np.array([20.0])) temperature_validated.set() self.setup_subscriber(temperature_data_product_id, callback=validate_temperature) pressure_validated = Event() def validate_pressure(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['pressure'], np.array([3.068])) pressure_validated.set() self.setup_subscriber(pressure_data_product_id, callback=validate_pressure) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(conductivity_validated.wait(10)) self.assertTrue(temperature_validated.wait(10)) self.assertTrue(pressure_validated.wait(10))
def test_create_data_process_definition(self): pfunc = ParameterFunction(name='test_func') func_id = self.dataset_management.create_parameter_function(pfunc) data_process_definition = DataProcessDefinition() data_process_definition.name = 'Simple' dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition, func_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dpd_id) self.addCleanup(self.dataset_management.delete_parameter_function, func_id) objs, _ = self.rrclient.find_objects(dpd_id, PRED.hasParameterFunction, id_only=False) self.assertEquals(len(objs), 1) self.assertIsInstance(objs[0], ParameterFunction)
def load_parameter_function(self, row): name = row['Name'] ftype = row['Function Type'] func_expr = row['Function'] owner = row['Owner'] args = ast.literal_eval(row['Args']) #kwargs = row['Kwargs'] descr = row['Description'] data_process_management = DataProcessManagementServiceProcessClient(self) function_type=None if ftype == 'PythonFunction': function_type = PFT.PYTHON elif ftype == 'NumexprFunction': function_type = PFT.NUMEXPR else: raise Conflict('Unsupported Function Type: %s' % ftype) parameter_function = ParameterFunctionResource( name=name, function=func_expr, function_type=function_type, owner=owner, args=args, description=descr) parameter_function.alt_ids = ['PRE:' + row['ID']] parameter_function_id = self.create_parameter_function(parameter_function) dpd = DataProcessDefinition() dpd.name = name dpd.description = 'Parameter Function Definition for %s' % name dpd.data_process_type = DataProcessTypeEnum.PARAMETER_FUNCTION dpd.parameters = args data_process_management.create_data_process_definition(dpd, parameter_function_id) return parameter_function_id
def load_parameter_function(self, row): name = row['Name'] ftype = row['Function Type'] func_expr = row['Function'] owner = row['Owner'] args = ast.literal_eval(row['Args']) #kwargs = row['Kwargs'] descr = row['Description'] data_process_management = DataProcessManagementServiceProcessClient( self) function_type = None if ftype == 'PythonFunction': function_type = PFT.PYTHON elif ftype == 'NumexprFunction': function_type = PFT.NUMEXPR else: raise Conflict('Unsupported Function Type: %s' % ftype) parameter_function = ParameterFunctionResource( name=name, function=func_expr, function_type=function_type, owner=owner, args=args, description=descr) parameter_function.alt_ids = ['PRE:' + row['ID']] parameter_function_id = self.create_parameter_function( parameter_function) dpd = DataProcessDefinition() dpd.name = name dpd.description = 'Parameter Function Definition for %s' % name dpd.data_process_type = DataProcessTypeEnum.PARAMETER_FUNCTION dpd.parameters = args data_process_management.create_data_process_definition( dpd, parameter_function_id) return parameter_function_id
def test_add_parameter_to_data_product(self): #self.preload_ui() self.test_add_parameter_function() data_product_id = self.data_product_id stream_def_id = self.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0] pdict_id = self.resource_registry.find_objects( stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0] # Create a new data product htat represents the L1 temp from the ctd simulator dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED) stream_def_id = self.pubsub_management.create_stream_definition( name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) dp_id = self.data_product_management.create_data_product( dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id) parameter_function = ParameterFunction(name='linear_corr', function_type=PFT.NUMEXPR, function='a * x + b', args=['x', 'a', 'b']) pf_id = self.dataset_management.create_parameter_function( parameter_function) dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction') self.data_process_management.create_data_process_definition(dpd, pf_id) parameter = ParameterContext(name='temperature_corrected', parameter_type='function', parameter_function_id=pf_id, parameter_function_map={ 'x': 'temp', 'a': 1.03, 'b': 0.25 }, value_encoding='float32', units='deg_C', display_name='Temperature Corrected') p_id = self.dataset_management.create_parameter(parameter) # Add it to the parent or parsed data product self.data_product_management.add_parameter_to_data_product( p_id, data_product_id) # Then update the child's stream definition to include it stream_def = self.pubsub_management.read_stream_definition( stream_def_id) stream_def.available_fields.append('temperature_corrected') self.resource_registry.update(stream_def) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save... gevent.sleep(10) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal( rdt['temperature_corrected'], np.arange(30, dtype=np.float32) * 1.03 + 0.25, decimal=5)
def test_add_parameter_function(self): # req-tag: NEW SA - 31 # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() self.data_product_id = data_product_id dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() #-------------------------------------------------------------------------------- # This is what the user defines either via preload or through the UI #-------------------------------------------------------------------------------- # Where the egg is egg_url = self.egg_url # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url) pfunc_id = self.dataset_management.create_parameter_function(pf) #-------------------------------------------------------------------------------- self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a data process definition dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays') dpd_id = self.data_process_management.create_data_process_definition( dpd, pfunc_id) # TODO: assert assoc exists argmap = {'a': 'temp', 'b': 'pressure'} dp_id = self.data_process_management.create_data_process( dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum') # Verify that the function worked! granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2)) # Verify that we can inspect it as well source_code = self.data_process_management.inspect_data_process_definition( dpd_id) self.assertEquals(source_code, 'def add_arrays(a, b):\n return a+b\n') url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) dpd_ids, _ = self.resource_registry.find_resources( name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True) dpd_id = dpd_ids[0] url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py' )