def load_data_product(self):
        dset_i = 0
        dataset_management      = DatasetManagementServiceClient()
        pubsub_management       = PubsubManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()
        resource_registry       = self.container.instance.resource_registry

        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(
            name='instrument_data_product_%i' % dset_i,
            description='ctd stream test',
            processing_level_code='Parsed_Canonical',
            temporal_domain = tdom,
            spatial_domain = sdom)
        pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id)
        data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.addCleanup(data_product_management.delete_data_product, data_product_id)
        data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id)

        stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True)
        stream_id = stream_ids[0]
        route = pubsub_management.read_stream_route(stream_id)

        dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True)
        dataset_id = dataset_ids[0]

        return data_product_id, stream_id, route, stream_def_id, dataset_id
Beispiel #2
0
    def load_data_product(self):
        dset_i = 0
        dataset_management      = DatasetManagementServiceClient()
        pubsub_management       = PubsubManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()
        resource_registry       = self.container.instance.resource_registry
        dp_obj = DataProduct(
            name='instrument_data_product_%i' % dset_i,
            description='ctd stream test',
            processing_level_code='Parsed_Canonical')
        pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id)
        data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.addCleanup(data_product_management.delete_data_product, data_product_id)
        data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id)

        stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True)
        stream_id = stream_ids[0]
        route = pubsub_management.read_stream_route(stream_id)

        dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True)
        dataset_id = dataset_ids[0]

        return data_product_id, stream_id, route, stream_def_id, dataset_id
 def publish_rdt_to_data_product(cls,data_product_id, rdt, connection_id='', connection_index=''):
     resource_registry       = Container.instance.resource_registry
     pubsub_management       = PubsubManagementServiceClient()
     stream_ids, _ = resource_registry.find_objects(data_product_id,'hasStream',id_only=True)
     stream_id = stream_ids[0]
     route = pubsub_management.read_stream_route(stream_id)
     publisher = StandaloneStreamPublisher(stream_id,route)
     publisher.publish(rdt.to_granule(connection_id=connection_id, connection_index=connection_index))
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url(
            'res/deploy/r2deploy.yml')  # Because hey why not?!

        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

    def setup_streams(self):
        in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_L0_test', id_only=True)
        out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_L1_test', id_only=True)

        in_stream_def_id = self.pubsub_management.create_stream_definition(
            'L0 SBE37', parameter_dictionary_id=in_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_def_id)
        out_stream_def_id = self.pubsub_management.create_stream_definition(
            'L1 SBE37', parameter_dictionary_id=out_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream(
            'L0 input',
            stream_definition_id=in_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)
        out_stream_id, out_route = self.pubsub_management.create_stream(
            'L0 output',
            stream_definition_id=out_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_advanced_streams(self):
        in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_LC_TEST', id_only=True)
        in_stream_def_id = self.pubsub_management.create_stream_definition(
            'sbe37_instrument',
            parameter_dictionary_id=in_pdict_id,
            available_fields=[
                'time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon'
            ])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_def_id)

        out_stream_def_id = self.pubsub_management.create_stream_definition(
            'sbe37_l2',
            parameter_dictionary_id=out_pdict_id,
            available_fields=['time', 'rho', 'PRACSAL_L2'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream(
            'instrument stream',
            stream_definition_id=in_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)

        out_stream_id, out_route = self.pubsub_management.create_stream(
            'data product stream',
            stream_definition_id=out_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'

        self.container.spawn_process('preload',
                                     'ion.processes.bootstrap.ion_loader',
                                     'IONLoader', config)

    def setup_advanced_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_advanced_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)] = None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id: out_stream_id}

        sub_id = self.pubsub_management.create_subscription(
            queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process(
            'transform_prime', 'ion.processes.data.transforms.transform_prime',
            'TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription(
            'listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription,
                        listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription,
                        listen_sub_id)
        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)] = None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id: out_stream_id}

        sub_id = self.pubsub_management.create_subscription(
            queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process(
            'transform_prime', 'ion.processes.data.transforms.transform_prime',
            'TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription(
            'listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription,
                        listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription,
                        listen_sub_id)
        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_validator(self, validator):
        listener = StandaloneStreamSubscriber('listener', validator)
        listener.start()
        self.addCleanup(listener.stop)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
class TestTransformWorker(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process=TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)


        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[output_data_product_id[0]]['parents'][self.input_dp_id]['data_process_definition_id'] == dataprocessdef_id)


        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )


        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time']         = [0] # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure']     = [2]
            rdt['salinity']     = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))


        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(dataprocessdef_id)
        self.assertIn( 'def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
            temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345" )
            instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()

        self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
            temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj,_ = self.rrclient.find_resources(restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345" )
            platform_device_id = self.imsclient.create_platform_device(platform_device=platform_device_obj)

        self.damsclient.assign_data_product(input_resource_id=platform_device_id, data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()
        self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]['type'] == 'PlatformDevice')


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()


        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated


        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id =  self.create_output_data_product()


        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' )

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                                 outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(ex.message, "Input data product does not contain the parameters defined in argument map")

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                                 outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(ex.message, "Output data product does not contain the output parameter name provided")


    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map= {"a": "salinity"}


        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM
            )

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array' )

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                             outputs=None, argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id =  self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"


        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
             uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a'
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' )

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                             outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject( RT.Attachment,
                                name='test_attachment',
                                attachment_type=AttachmentType.ASCII,
                                content_type='text/plain',
                                content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id


    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process1_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj,  dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription, subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s', stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id)

        return dp1_func_output_dp_id


    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s" ,  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn( data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()


    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn( stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin, self.stream_id )
        np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s" ,  str(status_alert_event.__dict__))
        self.event_verified.set()


    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)


    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1,2)
        self.assertEquals(a,3)
class TestDataProcessManagementPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dataset_management      = DatasetManagementServiceClient()
        self.resource_registry       = self.container.resource_registry
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.validators = 0


    def lc_preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'
        
        self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config)

    def ctd_plain_input_data_product(self):
        available_fields = [
                'internal_timestamp', 
                'temp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'conductivity', 
                'driver_timestamp', 
                'lon', 
                'pressure']
        return self.make_data_product('ctd_parsed_param_dict', 'ctd plain test', available_fields)


    def ctd_plain_salinity(self):
        available_fields = [
                'internal_timestamp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'driver_timestamp', 
                'lon', 
                'salinity']
        return self.make_data_product('ctd_parsed_param_dict', 'salinity', available_fields)

    def ctd_plain_density(self):
        available_fields = [
                'internal_timestamp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'driver_timestamp', 
                'lon', 
                'density']
        return self.make_data_product('ctd_parsed_param_dict', 'density', available_fields)

    def ctd_instrument_data_product(self):
        available_fields = [
                'internal_timestamp', 
                'temp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'conductivity', 
                'driver_timestamp', 
                'lon', 
                'pressure']
        return self.make_data_product('ctd_LC_TEST', 'ctd instrument', available_fields)

    def make_data_product(self, pdict_name, dp_name, available_fields=[]):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(name=dp_name)
        dp_obj.temporal_domain = tdom
        dp_obj.spatial_domain = sdom
        data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        return data_product_id

    def google_dt_data_product(self):
        return self.make_data_product('google_dt', 'visual')

    def ctd_derived_data_product(self):
        return self.make_data_product('ctd_LC_TEST', 'ctd derived products')
        
    def publish_to_plain_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [20.0]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4.2914]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [3.068]

        granule = rdt.to_granule()
        publisher.publish(granule)

    def publish_to_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        granule = rdt.to_granule()
        publisher.publish(granule)

    def setup_subscriber(self, data_product_id, callback):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()

        sub_id = self.pubsub_management.create_subscription('validator_%s'%self.validators, stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)


        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        subscriber = StandaloneStreamSubscriber('validator_%s' % self.validators, callback=callback)
        subscriber.start()
        self.addCleanup(subscriber.stop)
        self.validators+=1

        return subscriber

    def create_density_transform_function(self):
        tf = TransformFunction(name='ctdbp_l2_density', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_density', cls='CTDBP_DensityTransformAlgorithm')
        tf_id = self.data_process_management.create_transform_function(tf)
        self.addCleanup(self.data_process_management.delete_transform_function, tf_id)
        return tf_id

    def create_salinity_transform_function(self):
        tf = TransformFunction(name='ctdbp_l2_salinity', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity', cls='CTDBP_SalinityTransformAlgorithm')
        tf_id = self.data_process_management.create_transform_function(tf)
        self.addCleanup(self.data_process_management.delete_transform_function, tf_id)
        return tf_id

   
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_data_process_prime(self):
        self.lc_preload()
        instrument_data_product_id = self.ctd_instrument_data_product()
        derived_data_product_id = self.ctd_derived_data_product()

        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[instrument_data_product_id], out_data_product_ids=[derived_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)
    

        validated = Event()

        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)

            np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914]))
            np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.]))
            np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068]))
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881]))
            np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283]))

            validated.set()

        self.setup_subscriber(derived_data_product_id, callback=validation)
        self.publish_to_data_product(instrument_data_product_id)
        
        self.assertTrue(validated.wait(10))
        
    def test_older_transform(self):
        input_data_product_id = self.ctd_plain_input_data_product()

        conductivity_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'conductivity_product', ['time', 'conductivity'])
        conductivity_stream_def_id = self.get_named_stream_def('conductivity_product stream_def')
        temperature_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'temperature_product', ['time', 'temp'])
        temperature_stream_def_id = self.get_named_stream_def('temperature_product stream_def')
        pressure_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'pressure_product', ['time', 'pressure'])
        pressure_stream_def_id = self.get_named_stream_def('pressure_product stream_def')

        dpd = DataProcessDefinition(name='ctdL0')
        dpd.data_process_type = DataProcessTypeEnum.TRANSFORM
        dpd.module = 'ion.processes.data.transforms.ctd.ctd_L0_all'
        dpd.class_name = 'ctd_L0_all'

        data_process_definition_id = self.data_process_management.create_data_process_definition(dpd)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id)

        self.data_process_management.assign_stream_definition_to_data_process_definition(conductivity_stream_def_id, data_process_definition_id, binding='conductivity')
        self.data_process_management.assign_stream_definition_to_data_process_definition(temperature_stream_def_id, data_process_definition_id, binding='temperature')
        self.data_process_management.assign_stream_definition_to_data_process_definition(pressure_stream_def_id, data_process_definition_id, binding='pressure')

        data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[conductivity_data_product_id, temperature_data_product_id, pressure_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        conductivity_validated = Event()
        def validate_conductivity(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['conductivity'], np.array([4.2914]))
            conductivity_validated.set()

        self.setup_subscriber(conductivity_data_product_id, callback=validate_conductivity)
        temperature_validated = Event()
        def validate_temperature(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['temp'], np.array([20.0]))
            temperature_validated.set()
        self.setup_subscriber(temperature_data_product_id, callback=validate_temperature)
        pressure_validated = Event()
        def validate_pressure(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['pressure'], np.array([3.068]))
            pressure_validated.set()
        self.setup_subscriber(pressure_data_product_id, callback=validate_pressure)
        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(conductivity_validated.wait(10))
        self.assertTrue(temperature_validated.wait(10))
        self.assertTrue(pressure_validated.wait(10))



    def get_named_stream_def(self, name):
        stream_def_ids, _ = self.resource_registry.find_resources(name=name, restype=RT.StreamDefinition, id_only=True)
        return stream_def_ids[0]

    def test_actors(self):
        input_data_product_id = self.ctd_plain_input_data_product()
        output_data_product_id = self.ctd_plain_density()
        actor = self.create_density_transform_function()
        route = {input_data_product_id: {output_data_product_id: actor}}
        config = DotDict()
        config.process.routes = route
        config.process.params.lat = 45.
        config.process.params.lon = -71.

        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id], configuration=config)
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        validated = Event()
        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            # The value I use is a double, the value coming back is only a float32 so there's some data loss but it should be precise to the 4th digit
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) 
            validated.set()

        self.setup_subscriber(output_data_product_id, callback=validation)

        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(validated.wait(10))

    def test_multi_in_out(self):
        input1 = self.ctd_plain_input_data_product()
        input2 = self.make_data_product('ctd_parsed_param_dict', 'input2')

        density_dp_id = self.ctd_plain_density()
        salinity_dp_id = self.ctd_plain_salinity()

        density_actor = self.create_density_transform_function()
        salinity_actor = self.create_salinity_transform_function()

        routes = {
            input1 : {
                density_dp_id : density_actor,
                salinity_dp_id : salinity_actor
                },
            input2 : {
                density_dp_id : density_actor
                }
            }

        config = DotDict()
        config.process.routes = routes
        config.process.params.lat = 45.
        config.process.params.lon = -71.


        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input1, input2], out_data_product_ids=[density_dp_id, salinity_dp_id], configuration=config)
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        density_validated = Event()
        salinity_validated = Event()

        def density_validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) 
            density_validated.set()

        def salinity_validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.93513240786831]), decimal=4) 
            salinity_validated.set()

        self.setup_subscriber(density_dp_id, callback=density_validation)
        self.setup_subscriber(salinity_dp_id, callback=salinity_validation)
        
        self.publish_to_plain_data_product(input1)

        self.assertTrue(density_validated.wait(10))
        self.assertTrue(salinity_validated.wait(10))
        density_validated.clear()
        salinity_validated.clear()


        self.publish_to_plain_data_product(input2)
        self.assertTrue(density_validated.wait(10))
        self.assertFalse(salinity_validated.wait(0.75))
        density_validated.clear()
        salinity_validated.clear()



    def test_visual_transform(self):
        input_data_product_id = self.ctd_plain_input_data_product()
        output_data_product_id = self.google_dt_data_product()
        dpd = DataProcessDefinition(name='visual transform')
        dpd.data_process_type = DataProcessTypeEnum.TRANSFORM
        dpd.module = 'ion.processes.data.transforms.viz.google_dt'
        dpd.class_name = 'VizTransformGoogleDT'

        #--------------------------------------------------------------------------------
        # Walk before we base jump
        #--------------------------------------------------------------------------------

        data_process_definition_id = self.data_process_management.create_data_process_definition(dpd)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id)
    
        data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2,data_process_id)


        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        validated = Event()
        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            self.assertTrue(rdt['google_dt_components'] is not None)
            validated.set()

        self.setup_subscriber(output_data_product_id, callback=validation)

        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(validated.wait(10))
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.pids                 = []
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0

        self.purge_queues()
        self.queue_buffer         = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion')
        xn.purge()
        

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset('test_dataset_%i'%self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''

        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})

        self.pids.append(pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)
        
    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0]
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)




    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    @attr('SMOKE') 
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)




        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_array_almost_equal(rdt_out['density'], np.array([1021.7144739593881]))
        np.testing.assert_array_almost_equal(rdt_out['salinity'], np.array([30.935132729668283]))


    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))



    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1})
        
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))
        
        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20,40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1',{'offset_a':20.0})
        stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20))



    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_simplex_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()
        
        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))
        
        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()
        
        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))
    
        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()


    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)

        dataset_monitor = DatasetMonitor(dataset_id)

        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.event.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   20 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)
        

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)
            
 
        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()


        self.assertTrue(event.is_set())

        
    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov,ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)


    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id


    @unittest.skip('Outdated due to ingestion retry')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        
        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)


        self.assertTrue(event.wait(10))

        sub.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_coverage_types(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        cov = DatasetManagementService._get_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, ViewCoverage)

        cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, SimplexCoverage)
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?!

        self.dataset_management      = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()


    def setup_streams(self):
        in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L0_test', id_only=True)
        out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L1_test', id_only=True)

        in_stream_def_id = self.pubsub_management.create_stream_definition('L0 SBE37', parameter_dictionary_id=in_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id)
        out_stream_def_id = self.pubsub_management.create_stream_definition('L1 SBE37', parameter_dictionary_id=out_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream('L0 input', stream_definition_id=in_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)
        out_stream_id, out_route = self.pubsub_management.create_stream('L0 output', stream_definition_id=out_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]

    def setup_advanced_streams(self):
        in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_LC_TEST', id_only=True)
        in_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=['time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id)

        out_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho','PRACSAL_L2'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream('instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)

        out_stream_id, out_route = self.pubsub_management.create_stream('data product stream', stream_definition_id=out_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]


    def preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'
        
        self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config)

    def setup_advanced_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_advanced_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)]= None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id:out_stream_id}

        sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id)
        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]


    def setup_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)]= None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id:out_stream_id}

        sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id)
        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]

    def setup_validator(self, validator):
        listener = StandaloneStreamSubscriber('listener', validator)
        listener.start()
        self.addCleanup(listener.stop)

    
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]


        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Beispiel #9
0
class PubsubManagementIntTest(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.pubsub_management       = PubsubManagementServiceClient()
        self.resource_registry       = ResourceRegistryServiceClient()
        self.dataset_management      = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.pdicts = {}
        self.queue_cleanup = list()
        self.exchange_cleanup = list()
        self.context_ids = set()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

        self.cleanup_contexts()
    
    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id)

        # Make sure there is an assoc
        self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id)
        self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary))


        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id)
        self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

    @unittest.skip('Needs to be refactored for cleanup')
    def test_validate_stream_defs(self):
        self.addCleanup(self.cleanup_contexts)
        #test no input 
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
        #test input with no output
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test l1 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test l2 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test Ln from L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L2 from L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange')
        self.addCleanup(self.pubsub_management.delete_topic, topic_id)
        self.exchange_cleanup.append('test_exchange')
        topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside')
        self.addCleanup(self.pubsub_management.delete_topic, topic2_id)
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id)

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics,[topic_id])

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name,'test_stream')
        self.pubsub_management.delete_stream(stream_id)
        
        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))



    def test_data_product_subscription(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()
        dp = DataProduct(name='ctd parsed')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription_id)

        validated = Event()
        def validation(msg, route, stream_id):
            validated.set()

        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        dp_stream_id = stream_ids.pop()

        validator = StandaloneStreamSubscriber('validator', callback=validation)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_management.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id)

        route = self.pubsub_management.read_stream_route(dp_stream_id)

        publisher = StandaloneStreamPublisher(dp_stream_id, route)
        publisher.publish('hi')
        self.assertTrue(validated.wait(10))
            

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id)
        subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue')
        self.exchange_cleanup.append('test_exchange')

        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertEquals(subs,[stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True)
        self.assertEquals(len(res),1)

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(subscription_id)
        self.assertEquals(subscription.exchange_name, 'test_queue')

        self.pubsub_management.delete_subscription(subscription_id)
        
        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertFalse(len(subs))


        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')

        #--------------------------------------------------------------------------------
        # Test moving before activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m,r,s):
            self.assertEquals(m,'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream('test_stream','xp1')
        xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream('stream1','xp1')
        subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m,r,s):
            self.check1.set()


        subscriber = StandaloneStreamSubscriber('sub1',verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

        

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp')
        self.exchange_cleanup.append('test_xp')

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name,'test_topic')
        self.assertEquals(topic.exchange_point, 'test_xp')

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m,r,s):
            self.sub1_sat.set()

        def subscriber2(m,r,s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        sub2 = StandaloneStreamSubscriber('sub2', subscriber2)
        sub2.start()
        self.addCleanup(sub2.stop)

        log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, log_topic)
        science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, science_topic)
        events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_topic, events_topic)


        log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, log_stream)
        ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, ctd_stream)
        event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_stream, event_stream)
        raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data')
        self.addCleanup(self.pubsub_management.delete_stream, raw_stream)


        subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)

        self.pubsub_management.activate_subscription(subscription1)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1)
        self.pubsub_management.activate_subscription(subscription2)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream,1)
        self.assertTrue(self.sub1_sat.wait(4))
    
    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m,r,s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_topic, topic1)
        topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic2)
        topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic3)
        topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic4)
        topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic5)
        topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic6)
        topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic7)

        # Tree 2
        topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_topic, topic8)
        topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8)
        self.addCleanup(self.pubsub_management.delete_topic, topic9)
        topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic10)
        topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic11)
        topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic12)
        topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic13)
        self.exchange_cleanup.extend(['xp1','xp2'])
        
        stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_stream, stream1_id)
        stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream2_id)
        stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream3_id)
        stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream4_id)
        stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream5_id)

        subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)
        subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription3)
        subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription4)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id,1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)


        self.pubsub_management.deactivate_subscription(subscription1)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)
        
        self.publish_on_stream(stream2_id,2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10),4)


        self.pubsub_management.deactivate_subscription(subscription3)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10),6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        
        #--------------------------------------------------------------------------------
    
    def cleanup_contexts(self):
        for context_id in self.context_ids:
            self.dataset_management.delete_parameter_context(context_id)

    def add_context_to_cleanup(self, context_id):
        self.context_ids.add(context_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom)
        self.add_context_to_cleanup(t_ctxt_id)

        lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom)
        self.add_context_to_cleanup(lat_ctxt_id)


        lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom)
        self.add_context_to_cleanup(lon_ctxt_id)


        # Independent Parameters
         # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom)
        self.add_context_to_cleanup(temp_ctxt_id)


        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom)
        self.add_context_to_cleanup(cond_ctxt_id)


        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom)
        self.add_context_to_cleanup(press_ctxt_id)


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap)
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom)
        self.add_context_to_cleanup(tempL1_ctxt_id)


        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap)
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom)
        self.add_context_to_cleanup(condL1_ctxt_id)


        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap)
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom)
        self.add_context_to_cleanup(presL1_ctxt_id)


        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_kwargmap = None
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap)
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom)
        self.add_context_to_cleanup(sal_ctxt_id)


        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom)
        self.add_context_to_cleanup(dens_ctxt_id)

        
        ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id]
        contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt]
        context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values]
        pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        try:
            self.pdicts[pdict_name]
            return self.pdicts[pdict_name]
        except KeyError:
            pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time')
            self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
            self.pdicts[pdict_name] = pdict_id
            return pdict_id
class TestTransformWorker(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):
        self.loggerpids = []
        self.data_process_objs = []
        self._output_stream_ids = []

        self.start_transform_worker()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        #create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_listener()

        self.dp_list = self.create_data_processes()

        self.data_modified = Event()
        self.data_modified.wait(5)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())


        self.data_modified.wait(5)

        # Cleanup processes
        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)


    def create_data_processes(self):

        #two data processes using one transform and one DPD

        dp1_func_output_dp_id, dp2_func_output_dp_id =  self.create_output_data_products()
        configuration = { 'argument_map':{'arr1':'conductivity', 'arr2':'pressure'}, 'output_param' : 'salinity' }

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM

            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            )
        self.add_array_dpd_id = self.dataprocessclient.create_data_process_definition_new(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, self.add_array_dpd_id, binding='add_array_func' )

        # Create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process_new(data_process_definition_id=self.add_array_dpd_id, in_data_product_ids=[self.input_dp_id],
                                                                             out_data_product_ids=[dp1_func_output_dp_id], configuration=configuration)

        # Create the data process
        dp2_func_data_process_id = self.dataprocessclient.create_data_process_new(data_process_definition_id=self.add_array_dpd_id, in_data_product_ids=[self.input_dp_id],
                                                                                  out_data_product_ids=[dp2_func_output_dp_id], configuration=configuration)

        return [dp1_data_process_id, dp2_func_data_process_id]


    def create_output_data_products(self):

        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process1_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj,  dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])


        dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp2_func_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process2_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp2_func_output_dp_id = self.dataproductclient.create_data_product(dp2_func_output_dp_obj,  dp2_func_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])


        subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription, subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s', stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)


        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id)

        return dp1_func_output_dp_id, dp2_func_output_dp_id


    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s" ,  str(data_process_event.__dict__))
        self.assertTrue( data_process_event.origin in self.dp_list)


    def validate_output_granule(self, msg, route, stream_id):
        self.assertTrue( stream_id in self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        #self.assertTrue( sal_val == 3)
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)


    def start_transform_worker(self):
        config = DotDict()
        config.process.queue_name = 'parsed_subscription'

        self.container.spawn_process(
            name='transform_worker',
            module='ion.processes.data.transforms.transform_worker',
            cls='TransformWorker',
            config=config
        )


    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1,2)
        self.assertEquals(a,3)
class TestGranulePublish(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name + '_logger')
        producer_definition.executable = {
            'module': 'ion.processes.data.stream_granule_logger',
            'class': 'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(
            process_definition=producer_definition)
        configuration = {
            'process': {
                'stream_id': stream_id,
            }
        }
        pid = self.processdispatchclient.schedule_process(
            process_definition_id=logger_procdef_id,
            configuration=configuration)

        return pid

    #overriding trigger function here to use new granule
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []

        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition(
            'parsed stream', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        dp_obj = IonObject(RT.DataProduct,
                           name=str(uuid.uuid4()),
                           description='ctd stream test',
                           temporal_domain=tdom.dump(),
                           spatial_domain=sdom.dump())

        data_product_id1 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=stream_definition_id)

        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                   PRED.hasStream, None, True)
        log.debug('test_granule_publish: Data product streams1 = %s',
                  stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0])
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {
            "driver_timestamp":
            3555971105.1268806,
            "instrument_id":
            "ABC-123",
            "pkt_format_id":
            "JSON_Data",
            "pkt_version":
            1,
            "preferred_timestamp":
            "driver_timestamp",
            "quality_flag":
            "ok",
            "stream_name":
            "parsed",
            "values": [{
                "value": 22.9304,
                "value_id": "temp"
            }, {
                "value": 51.57381,
                "value_id": "conductivity"
            }, {
                "value": 915.551,
                "value_id": "pressure"
            }]
        }

        for value in tomato['values']:
            log.debug(
                "test_granule_publish: Looping tomato values  key: %s    val: %s ",
                str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array([value['value']])
                log.debug(
                    "test_granule_publish: Added data item  %s  val: %s ",
                    str(value['value']), str(value['value_id']))

        g = rdt.to_granule()

        publisher.publish(g)

        gevent.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        #--------------------------------------------------------------------------------
        # Cleanup data products
        #--------------------------------------------------------------------------------
        dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct,
                                                 id_only=True)

        for dp_id in dp_ids:
            self.dataproductclient.delete_data_product(dp_id)
class TestTransformWorkerSubscriptions(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()
        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product_one', description='input test stream one',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_one_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product_two', description='input test stream two',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_two_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]


        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id =  self.create_output_data_products()
        first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(stream_id=self.stream_one_id, stream_route=stream_route_one )

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(stream_id=self.stream_two_id, stream_route=stream_route_two )

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure']     = [4]
        rdt['salinity']     = [1]

        self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id)


        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product_one', description='input test stream one',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_one_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)


        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id =  self.create_output_data_products()

        first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(stream_id=self.stream_one_id, stream_route=stream_route_one )


        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id)


        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))



    def create_data_process_definition(self):

        #two data processes using one transform and one DPD

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'

            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' )

        return add_array_dpd_id

    def create_data_process_one(self, data_process_definition_id, output_dataproduct):

        # Create the data process
        #data process 1 adds conductivity + pressure and puts the result in salinity
        argument_map = {"arr1":"conductivity", "arr2":"pressure"}
        output_param = "salinity" 
        dp1_data_process_id = self.dataprocessclient.create_data_process(
                    data_process_definition_id=data_process_definition_id, 
                    inputs=[self.input_dp_one_id], 
                    outputs=[output_dataproduct], 
                    argument_map=argument_map, 
                    out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)
        self.dp_list.append(dp1_data_process_id)

        return dp1_data_process_id


    def create_data_process_two(self, data_process_definition_id, input_dataproduct, output_dataproduct):

        # Create the data process
        #data process 2 adds salinity + pressure and puts the result in conductivity
        argument_map = {'arr1':'salinity', 'arr2':'pressure'}
        output_param = 'conductivity'
        dp2_func_data_process_id = self.dataprocessclient.create_data_process(
                    data_process_definition_id=data_process_definition_id, 
                    inputs=[input_dataproduct],
                    outputs=[output_dataproduct], 
                    argument_map=argument_map, 
                    out_param_name=output_param)
        self.damsclient.register_process(dp2_func_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process, dp2_func_data_process_id)
        self.dp_list.append(dp2_func_data_process_id)

        return  dp2_func_data_process_id


    def create_output_data_products(self):

        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process1_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj,  dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_one_id = stream_ids[0]


        dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp2_func_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process2_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp2_func_output_dp_id = self.dataproductclient.create_data_product(dp2_func_output_dp_obj,  dp2_func_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_two_id = stream_ids[0]


        subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription, subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s', stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)


        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id)

        return dp1_func_output_dp_id, dp2_func_output_dp_id


    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s" ,  str(data_process_event.__dict__))

        #if data process already created, check origin
        if not 'data process assigned to transform worker' in data_process_event.description:
            self.assertIn( data_process_event.origin, self.dp_list)


    def validate_output_granule(self, msg, route, stream_id):
        self.assertTrue( stream_id in [self._output_stream_one_id, self._output_stream_two_id])

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  stream_id: %s', stream_id)

        if stream_id == self._output_stream_one_id:
            sal_val = rdt['salinity']
            log.debug('validate_output_granule  sal_val: %s', sal_val)
            np.testing.assert_array_equal(sal_val, np.array([3]))
            self.event1_verified.set()
        else:
            cond_val = rdt['conductivity']
            log.debug('validate_output_granule  cond_val: %s', cond_val)
            np.testing.assert_array_equal(cond_val, np.array([5]))
            self.event2_verified.set()

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)
class ExternalDatasetAgentTestBase(object):

    # Agent parameters.
    EDA_RESOURCE_ID = '123xyz'
    EDA_NAME = 'ExampleEDA'
    EDA_MOD = 'ion.agents.data.external_dataset_agent'
    EDA_CLS = 'ExternalDatasetAgent'

    """
    Test cases for instrument agent class. Functions in this class provide
    instrument agent integration tests and provide a tutorial on use of
    the agent setup and interface.
    """
    def setUp(self):
        """
        Initialize test members.
        """

        #log.warn('Starting the container')
        # Start container.
        self._start_container()

        # Bring up services in a deploy file
        #log.warn('Starting the rel')
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Create a pubsub client to create streams.
        #        log.warn('Init a pubsub client')
        self._pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        #        log.warn('Init a ContainerAgentClient')
        self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name)

        # Data async and subscription  TODO: Replace with new subscriber
        self._finished_count = None
        #TODO: Switch to gevent.queue.Queue
        self._async_finished_result = AsyncResult()
        self._finished_events_received = []
        self._finished_event_subscriber = None
        self._start_finished_event_subscriber()
        self.addCleanup(self._stop_finished_event_subscriber)

        # TODO: Finish dealing with the resources and whatnot
        # TODO: DVR_CONFIG and (potentially) stream_config could both be reconfigured in self._setup_resources()
        self._setup_resources()

        #TG: Setup/configure the granule logger to log granules as they're published

        # Create agent config.
        agent_config = {
            'driver_config': self.DVR_CONFIG,
            'stream_config': {},
            'agent': {'resource_id': self.EDA_RESOURCE_ID},
            'test_mode': True
        }

        # Start instrument agent.
        self._ia_pid = None
        log.debug('TestInstrumentAgent.setup(): starting EDA.')
        self._ia_pid = self._container_client.spawn_process(
            name=self.EDA_NAME,
            module=self.EDA_MOD,
            cls=self.EDA_CLS,
            config=agent_config
        )
        log.info('Agent pid=%s.', str(self._ia_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = None
        self._ia_client = ResourceAgentClient(self.EDA_RESOURCE_ID, process=FakeProcess())
        log.info('Got ia client %s.', str(self._ia_client))

    ########################################
    # Private "setup" functions
    ########################################

    def _setup_resources(self):
        raise NotImplementedError('_setup_resources must be implemented in the subclass')

    def create_stream_and_logger(self, name, stream_id='', pdict=None):

        stream_def_id = ''
        if not stream_id or stream_id is '':
            if pdict:
                stream_def_id = self._pubsub_client.create_stream_definition(parameter_dictionary=pdict.dump(), stream_type='stream')
            stream_id, route = self._pubsub_client.create_stream(name=name, exchange_point='science_data', stream_definition_id=stream_def_id)
        else:
            route = self._pubsub_client.read_stream_route(stream_id=stream_id)
            stream_def = self._pubsub_client.read_stream_definition(stream_id=stream_id)
            stream_def_id = stream_def._id

        pid = self._container_client.spawn_process(
            name=name + '_logger',
            module='ion.processes.data.stream_granule_logger',
            cls='StreamGranuleLogger',
            config={'process': {'stream_id': stream_id}}
        )
        log.info('Started StreamGranuleLogger \'{0}\' subscribed to stream_id={1}'.format(pid, stream_id))

        return stream_id, route, stream_def_id

    def _start_finished_event_subscriber(self):

        def consume_event(*args, **kwargs):
            if args[0].description == 'TestingFinished':
                log.debug('TestingFinished event received')
                self._finished_events_received.append(args[0])
                if self._finished_count and self._finished_count == len(self._finished_events_received):
                    log.debug('Finishing test...')
                    self._async_finished_result.set(len(self._finished_events_received))
                    log.debug('Called self._async_finished_result.set({0})'.format(len(self._finished_events_received)))

        self._finished_event_subscriber = EventSubscriber(event_type='DeviceEvent', callback=consume_event)
        self._finished_event_subscriber.start()

    def _stop_finished_event_subscriber(self):
        if self._finished_event_subscriber:
            self._finished_event_subscriber.stop()
            self._finished_event_subscriber = None

    ########################################
    # Custom assertion functions
    ########################################
    def assertListsEqual(self, lst1, lst2):
        lst1.sort()
        lst2.sort()
        return lst1 == lst2

    def assertSampleDict(self, val):
        """
        Verify the value is a sample dictionary for the sbe37.
        """
        #{'p': [-6.945], 'c': [0.08707], 't': [20.002], 'time': [1333752198.450622]}
        self.assertTrue(isinstance(val, dict))
        self.assertTrue('c' in val)
        self.assertTrue('t' in val)
        self.assertTrue('p' in val)
        self.assertTrue('time' in val)
        c = val['c'][0]
        t = val['t'][0]
        p = val['p'][0]
        time = val['time'][0]

        self.assertTrue(isinstance(c, float))
        self.assertTrue(isinstance(t, float))
        self.assertTrue(isinstance(p, float))
        self.assertTrue(isinstance(time, float))

    def assertParamDict(self, pd, all_params=False):
        """
        Verify all device parameters exist and are correct type.
        """
        if all_params:
            self.assertEqual(set(pd.keys()), set(PARAMS.keys()))
            for (key, type_val) in PARAMS.iteritems():
                if type_val == list or type_val == tuple:
                    self.assertTrue(isinstance(pd[key], (list, tuple)))
                else:
                    self.assertTrue(isinstance(pd[key], type_val))

        else:
            for (key, val) in pd.iteritems():
                self.assertTrue(key in PARAMS)
                self.assertTrue(isinstance(val, PARAMS[key]))

    def assertParamVals(self, params, correct_params):
        """
        Verify parameters take the correct values.
        """
        self.assertEqual(set(params.keys()), set(correct_params.keys()))
        for (key, val) in params.iteritems():
            correct_val = correct_params[key]
            if isinstance(val, float):
                # Verify to 5% of the larger value.
                max_val = max(abs(val), abs(correct_val))
                self.assertAlmostEqual(val, correct_val, delta=max_val * .01)

            elif isinstance(val, (list, tuple)):
                # list of tuple.
                self.assertEqual(list(val), list(correct_val))

            else:
                # int, bool, str.
                self.assertEqual(val, correct_val)

    ########################################
    # Test functions
    ########################################
    def test_acquire_data_while_streaming(self):
        # Test instrument driver execute interface to start and stop streaming mode.
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        params = {
            'POLLING_INTERVAL': 3
        }
        self._ia_client.set_resource(params)

        self._finished_count = 1

        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STREAMING)

        config = get_safe(self.DVR_CONFIG, 'dh_cfg', {})
        log.info('Send a constrained request for data: constraints = HIST_CONSTRAINTS_1')
        config['stream_id'], config['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_1')
        config['constraints'] = self.HIST_CONSTRAINTS_1
        cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config])
        self._ia_client.execute_resource(cmd)

        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        finished = self._async_finished_result.get(timeout=120)
        self.assertEqual(finished, self._finished_count)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_acquire_data(self):

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        log.warn('Send an unconstrained request for data (\'new data\')')
        cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE)
        self._ia_client.execute_resource(command=cmd)
        state = self._ia_client.get_agent_state()
        log.info(state)
        self.assertEqual(state, ResourceAgentState.COMMAND)

        self._finished_count = 2

        config_mods = {}

        log.info('Send a constrained request for data: constraints = HIST_CONSTRAINTS_1')
        config_mods['stream_id'], config_mods['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_1')
        config_mods['constraints'] = self.HIST_CONSTRAINTS_1
        cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config_mods])
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        log.info('Send a second constrained request for data: constraints = HIST_CONSTRAINTS_2')
        config_mods['stream_id'], config_mods['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_2')
        config_mods['constraints'] = self.HIST_CONSTRAINTS_2
        cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config_mods])
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        finished = self._async_finished_result.get(timeout=120)
        self.assertEqual(finished, self._finished_count)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_streaming(self):
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        params = {
            'POLLING_INTERVAL': 3
        }
        self._ia_client.set_resource(params)

        self._finished_count = 3

        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STREAMING)

        #Assert that data was received
        #        self._async_finished_result.get(timeout=600)
        #        self.assertTrue(len(self._finished_events_received) >= 3)

        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_command(self):
        # Test instrument driver get and set interface.

        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        # Retrieve all resource parameters.
        reply = self._ia_client.get_resource(params=['DRIVER_PARAMETER_ALL'])
        self.assertParamDict(reply, True)

        ## Retrieve a subset of resource parameters.
        params = [
            'POLLING_INTERVAL'
        ]
        reply = self._ia_client.get_resource(params=params)
        self.assertParamDict(reply)
        orig_params = reply

        # Set a subset of resource parameters.
        new_params = {
            'POLLING_INTERVAL': (orig_params['POLLING_INTERVAL'] * 2),
            }
        self._ia_client.set_resource(params=new_params)
        check_new_params = self._ia_client.get_resource(params)
        self.assertParamVals(check_new_params, new_params)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_get_set_resource(self):
        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)

        # Get a couple parameters
        retval = self._ia_client.get_resource(['POLLING_INTERVAL', 'PATCHABLE_CONFIG_KEYS'])
        log.debug('Retrieved parameters from agent: {0}'.format(retval))
        self.assertTrue(isinstance(retval, dict))
        self.assertEqual(type(retval['POLLING_INTERVAL']), int)
        self.assertEqual(type(retval['PATCHABLE_CONFIG_KEYS']), list)

        # Attempt to get a parameter that doesn't exist
        log.debug('Try getting a non-existent parameter \'BAD_PARAM\'')
        with self.assertRaises(ServerError):
            self._ia_client.get_resource(['BAD_PARAM'])

        # Set the polling_interval to a new value, then get it to make sure it set properly
        self._ia_client.set_resource({'POLLING_INTERVAL': 10})
        retval = self._ia_client.get_resource(['POLLING_INTERVAL'])
        log.debug('Retrieved parameters from agent: {0}'.format(retval))
        self.assertTrue(isinstance(retval, dict))
        self.assertEqual(retval['POLLING_INTERVAL'], 10)

        # Attempt to set a parameter that doesn't exist
        log.debug('Try setting a non-existent parameter \'BAD_PARAM\'')
        with self.assertRaises(ServerError):
            self._ia_client.set_resource({'BAD_PARAM': 'bad_val'})

        # Attempt to set one parameter that does exist, and one that doesn't
        with self.assertRaises(ServerError):
            self._ia_client.set_resource({'POLLING_INTERVAL': 20, 'BAD_PARAM': 'bad_val'})

        retval = self._ia_client.get_resource(['POLLING_INTERVAL'])
        log.debug('Retrieved parameters from agent: {0}'.format(retval))
        self.assertTrue(isinstance(retval, dict))
        self.assertEqual(retval['POLLING_INTERVAL'], 20)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_initialize(self):
        # Test agent initialize command. This causes creation of driver process and transition to inactive.

        # We start in uninitialized state.
        # In this state there is no driver process.
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        # Initialize the agent.
        # The agent is spawned with a driver config, but you can pass one in
        # optinally with the initialize command. This validates the driver
        # config, launches a driver process and connects to it via messaging.
        # If successful, we switch to the inactive state.
        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        # Reset the agent. This causes the driver messaging to be stopped,
        # the driver process to end and switches us back to uninitialized.
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_states(self):
        # Test agent state transitions.

        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STOPPED)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STOPPED)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STREAMING)

        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_capabilities(self):
        """
        Test the ability to retrieve agent and resource parameter and command
        capabilities in various system states.
        """

        # Test the ability to retrieve agent and resource parameter and command capabilities.
        acmds = self._ia_client.get_capabilities(['AGT_CMD'])
        log.debug('Agent Commands: {0}'.format(acmds))
        #        acmds = [item[1] for item in acmds]
        self.assertListsEqual(acmds, AGT_CMDS.keys())
        apars = self._ia_client.get_capabilities(['AGT_PAR'])
        log.debug('Agent Parameters: {0}'.format(apars))

        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        rcmds = self._ia_client.get_capabilities(['RES_CMD'])
        log.debug('Resource Commands: {0}'.format(rcmds))
        #        rcmds = [item[1] for item in rcmds]
        self.assertListsEqual(rcmds, CMDS.keys())

        rpars = self._ia_client.get_capabilities(['RES_PAR'])
        log.debug('Resource Parameters: {0}'.format(rpars))
        #        rpars = [item[1] for item in rpars]
        self.assertListsEqual(rpars, PARAMS.keys())

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

    def test_errors(self):
        # Test illegal behavior and replies.

        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)

        # Can't go active in unitialized state.
        # Status 660 is state error.
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        with self.assertRaises(Conflict):
            self._ia_client.execute_agent(cmd)

        # Can't command driver in this state.
        cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE)
        with self.assertRaises(Conflict):
            self._ia_client.execute_resource(cmd)
            #self.assertEqual(reply.status, 660)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        # 404 unknown agent command.
        cmd = AgentCommand(command='kiss_edward')
        with self.assertRaises(BadRequest):
            self._ia_client.execute_agent(cmd)

        # 670 unknown driver command.
        cmd = AgentCommand(command='acquire_sample_please')
        with self.assertRaises(ServerError):
            self._ia_client.execute_resource(cmd)

        # 630 Parameter error.
        #self.assertRaises(InstParameterError, self._ia_client.get_param, 'bogus bogus')

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.UNINITIALIZED)
class TestGranulePublish(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()


    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id,
                                                          configuration=configuration)

        return pid

    #overriding trigger function here to use new granule
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []


        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id)


        tdom, sdom = time_series_domain()

        dp_obj = IonObject(RT.DataProduct,
            name='the parsed data',
            description='ctd stream test',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id)


        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]}

        for value in tomato['values']:
            log.debug("test_granule_publish: Looping tomato values  key: %s    val: %s ", str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array( [ value['value'] ] )
                log.debug("test_granule_publish: Added data item  %s  val: %s ", str(value['value']), str(value['value_id']) )

        g = rdt.to_granule()

        publisher.publish(g)

        time.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
class TestOmsLaunch(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()

        # Use the network definition provided by RSN OMS directly.
        rsn_oms = CIOMSClientFactory.create_instance(DVR_CONFIG['oms_uri'])
        self._network_definition = RsnOmsUtil.build_network_definition(rsn_oms)
        # get serialized version for the configuration:
        self._network_definition_ser = NetworkUtil.serialize_network_definition(
            self._network_definition)
        if log.isEnabledFor(logging.DEBUG):
            log.debug("NetworkDefinition serialization:\n%s",
                      self._network_definition_ser)

        self.platformModel_id = None

        self.all_platforms = {}
        self.agent_streamconfig_map = {}

        self._async_data_result = AsyncResult()
        self._data_subscribers = []
        self._samples_received = []
        self.addCleanup(self._stop_data_subscribers)

        self._async_event_result = AsyncResult()
        self._event_subscribers = []
        self._events_received = []
        self.addCleanup(self._stop_event_subscribers)
        self._start_event_subscriber()

        self._set_up_DataProduct_obj()
        self._set_up_PlatformModel_obj()

    def _set_up_DataProduct_obj(self):
        # Create data product object to be used for each of the platform log streams
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        self.pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'platform_eng_parsed', id_only=True)
        self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition(
            name='platform_eng', parameter_dictionary_id=self.pdict_id)
        self.dp_obj = IonObject(RT.DataProduct,
                                name='platform_eng data',
                                description='platform_eng test',
                                temporal_domain=tdom,
                                spatial_domain=sdom)

    def _set_up_PlatformModel_obj(self):
        # Create PlatformModel
        platformModel_obj = IonObject(RT.PlatformModel,
                                      name='RSNPlatformModel',
                                      description="RSNPlatformModel")
        try:
            self.platformModel_id = self.imsclient.create_platform_model(
                platformModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new PLatformModel: %s" % ex)
        log.debug('new PlatformModel id = %s', self.platformModel_id)

    def _traverse(self, pnode, platform_id, parent_platform_objs=None):
        """
        Recursive routine that repeatedly calls _prepare_platform to build
        the object dictionary for each platform.

        @param pnode PlatformNode
        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval the dict returned by _prepare_platform at this level.
        """

        log.info("Starting _traverse for %r", platform_id)

        plat_objs = self._prepare_platform(pnode, platform_id,
                                           parent_platform_objs)

        self.all_platforms[platform_id] = plat_objs

        # now, traverse the children:
        for sub_pnode in pnode.subplatforms.itervalues():
            subplatform_id = sub_pnode.platform_id
            self._traverse(sub_pnode, subplatform_id, plat_objs)

        return plat_objs

    def _prepare_platform(self, pnode, platform_id, parent_platform_objs):
        """
        This routine generalizes the manual construction originally done in
        test_oms_launch.py. It is called by the recursive _traverse method so
        all platforms starting from a given base platform are prepared.

        Note: For simplicity in this test, sites are organized in the same
        hierarchical way as the platforms themselves.

        @param pnode PlatformNode
        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval a dict of associated objects similar to those in
                test_oms_launch
        """

        site__obj = IonObject(RT.PlatformSite,
                              name='%s_PlatformSite' % platform_id,
                              description='%s_PlatformSite platform site' %
                              platform_id)

        site_id = self.omsclient.create_platform_site(site__obj)

        if parent_platform_objs:
            # establish hasSite association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['site_id'],
                predicate=PRED.hasSite,
                object=site_id)

        # prepare platform attributes and ports:
        monitor_attribute_objs, monitor_attribute_dicts = self._prepare_platform_attributes(
            pnode, platform_id)

        port_objs, port_dicts = self._prepare_platform_ports(
            pnode, platform_id)

        device__obj = IonObject(
            RT.PlatformDevice,
            name='%s_PlatformDevice' % platform_id,
            description='%s_PlatformDevice platform device' % platform_id,
            #                        ports=port_objs,
            #                        platform_monitor_attributes = monitor_attribute_objs
        )

        device__dict = dict(
            ports=port_dicts,
            platform_monitor_attributes=monitor_attribute_dicts)

        self.device_id = self.imsclient.create_platform_device(device__obj)

        self.imsclient.assign_platform_model_to_platform_device(
            self.platformModel_id, self.device_id)
        self.rrclient.create_association(subject=site_id,
                                         predicate=PRED.hasDevice,
                                         object=self.device_id)
        self.damsclient.register_instrument(instrument_id=self.device_id)

        if parent_platform_objs:
            # establish hasDevice association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['device_id'],
                predicate=PRED.hasDevice,
                object=self.device_id)

        agent__obj = IonObject(RT.PlatformAgent,
                               name='%s_PlatformAgent' % platform_id,
                               description='%s_PlatformAgent platform agent' %
                               platform_id)

        agent_id = self.imsclient.create_platform_agent(agent__obj)

        if parent_platform_objs:
            # add this platform_id to parent's children:
            parent_platform_objs['children'].append(platform_id)

        self.imsclient.assign_platform_model_to_platform_agent(
            self.platformModel_id, agent_id)

        #        agent_instance_obj = IonObject(RT.PlatformAgentInstance,
        #                                name='%s_PlatformAgentInstance' % platform_id,
        #                                description="%s_PlatformAgentInstance" % platform_id)
        #
        #        agent_instance_id = self.imsclient.create_platform_agent_instance(
        #                            agent_instance_obj, agent_id, device_id)

        plat_objs = {
            'platform_id': platform_id,
            'site__obj': site__obj,
            'site_id': site_id,
            'device__obj': device__obj,
            'device_id': self.device_id,
            'agent__obj': agent__obj,
            'agent_id': agent_id,
            #            'agent_instance_obj': agent_instance_obj,
            #            'agent_instance_id':  agent_instance_id,
            'children': []
        }

        log.info("plat_objs for platform_id %r = %s", platform_id,
                 str(plat_objs))

        stream_config = self._create_stream_config(plat_objs)
        self.agent_streamconfig_map[platform_id] = stream_config
        #        self.agent_streamconfig_map[platform_id] = None
        #        self._start_data_subscriber(agent_instance_id, stream_config)

        return plat_objs

    def _prepare_platform_attributes(self, pnode, platform_id):
        """
        Returns the list of PlatformMonitorAttributes objects corresponding to
        the attributes associated to the given platform.
        """
        # TODO complete the clean-up of this method
        ret_infos = dict((n, a.defn) for (n, a) in pnode.attrs.iteritems())

        monitor_attribute_objs = []
        monitor_attribute_dicts = []
        for attrName, attrDfn in ret_infos.iteritems():
            log.debug("platform_id=%r: preparing attribute=%r", platform_id,
                      attrName)

            monitor_rate = attrDfn['monitorCycleSeconds']
            units = attrDfn['units']

            plat_attr_obj = IonObject(OT.PlatformMonitorAttributes,
                                      id=attrName,
                                      monitor_rate=monitor_rate,
                                      units=units)

            plat_attr_dict = dict(id=attrName,
                                  monitor_rate=monitor_rate,
                                  units=units)

            monitor_attribute_objs.append(plat_attr_obj)
            monitor_attribute_dicts.append(plat_attr_dict)

        return monitor_attribute_objs, monitor_attribute_dicts

    def _prepare_platform_ports(self, pnode, platform_id):
        """
        Returns the list of PlatformPort objects corresponding to the ports
        associated to the given platform.
        """
        # TODO complete the clean-up of this method

        port_objs = []
        port_dicts = []
        for port_id, network in pnode.ports.iteritems():
            log.debug("platform_id=%r: preparing port=%r network=%s",
                      platform_id, port_id, network)

            #
            # Note: the name "IP" address has been changed to "network" address
            # in the CI-OMS interface spec.
            #
            plat_port_obj = IonObject(OT.PlatformPort,
                                      port_id=port_id,
                                      ip_address=network)

            plat_port_dict = dict(port_id=port_id, network=network)

            port_objs.append(plat_port_obj)

            port_dicts.append(plat_port_dict)

        return port_objs, port_dicts

    def _create_stream_config(self, plat_objs):

        platform_id = plat_objs['platform_id']
        device_id = plat_objs['device_id']

        #create the log data product
        self.dp_obj.name = '%s platform_eng data' % platform_id
        self.data_product_id = self.dpclient.create_data_product(
            data_product=self.dp_obj,
            stream_definition_id=self.platform_eng_stream_def_id)
        self.damsclient.assign_data_product(
            input_resource_id=self.device_id,
            data_product_id=self.data_product_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(self.data_product_id,
                                                   PRED.hasStream, None, True)

        stream_config = self._build_stream_config(stream_ids[0])
        return stream_config

    def _build_stream_config(self, stream_id=''):

        platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name(
            'platform_eng_parsed')

        #get the streamroute object from pubsub by passing the stream_id
        stream_def_ids, _ = self.rrclient.find_objects(
            stream_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)

        stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id)
        stream_config = {
            'routing_key': stream_route.routing_key,
            'stream_id': stream_id,
            'stream_definition_ref': stream_def_ids[0],
            'exchange_point': stream_route.exchange_point,
            'parameter_dictionary': platform_eng_dictionary.dump()
        }

        return stream_config

    def _set_platform_agent_instances(self):
        """
        Once most of the objs/defs associated with all platforms are in
        place, this method creates and associates the PlatformAgentInstance
        elements.
        """

        self.platform_configs = {}
        for platform_id, plat_objs in self.all_platforms.iteritems():

            PLATFORM_CONFIG = {
                'platform_id': platform_id,
                'agent_streamconfig_map': None,  #self.agent_streamconfig_map,
                'driver_config': DVR_CONFIG,
                'network_definition': self._network_definition_ser
            }

            self.platform_configs[platform_id] = {
                'platform_id': platform_id,
                'agent_streamconfig_map': self.agent_streamconfig_map,
                'driver_config': DVR_CONFIG,
                'network_definition': self._network_definition_ser
            }

            agent_config = {
                'platform_config': PLATFORM_CONFIG,
            }

            self.stream_id = self.agent_streamconfig_map[platform_id][
                'stream_id']

            #            import pprint
            #            print '============== platform id within unit test: %s ===========' % platform_id
            #            pprint.pprint(agent_config)
            #agent_config['platform_config']['agent_streamconfig_map'] = None

            agent_instance_obj = IonObject(
                RT.PlatformAgentInstance,
                name='%s_PlatformAgentInstance' % platform_id,
                description="%s_PlatformAgentInstance" % platform_id,
                agent_config=agent_config)

            agent_id = plat_objs['agent_id']
            device_id = plat_objs['device_id']
            agent_instance_id = self.imsclient.create_platform_agent_instance(
                agent_instance_obj, agent_id, self.device_id)

            plat_objs['agent_instance_obj'] = agent_instance_obj
            plat_objs['agent_instance_id'] = agent_instance_id

            stream_config = self.agent_streamconfig_map[platform_id]
            self._start_data_subscriber(agent_instance_id, stream_config)

    def _start_data_subscriber(self, stream_name, stream_config):
        """
        Starts data subscriber for the given stream_name and stream_config
        """
        def consume_data(message, stream_route, stream_id):
            # A callback for processing subscribed-to data.
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            self._async_data_result.set()

        log.info('_start_data_subscriber stream_name=%r', stream_name)

        stream_id = self.stream_id  #stream_config['stream_id']

        # Create subscription for the stream
        exchange_name = '%s_queue' % stream_name
        self.container.ex_manager.create_xn_queue(exchange_name).purge()
        sub = StandaloneStreamSubscriber(exchange_name, consume_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = self.pubsubcli.create_subscription(name=exchange_name,
                                                    stream_ids=[stream_id])
        self.pubsubcli.activate_subscription(sub_id)
        sub.subscription_id = sub_id

    def _stop_data_subscribers(self):
        """
        Stop the data subscribers on cleanup.
        """
        try:
            for sub in self._data_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(
                            sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._data_subscribers = []

    def _start_event_subscriber(self,
                                event_type="DeviceEvent",
                                sub_type="platform_event"):
        """
        Starts event subscriber for events of given event_type ("DeviceEvent"
        by default) and given sub_type ("platform_event" by default).
        """
        def consume_event(evt, *args, **kwargs):
            # A callback for consuming events.
            log.info('Event subscriber received evt: %s.', str(evt))
            self._events_received.append(evt)
            self._async_event_result.set(evt)

        sub = EventSubscriber(event_type=event_type,
                              sub_type=sub_type,
                              callback=consume_event)

        sub.start()
        log.info("registered event subscriber for event_type=%r, sub_type=%r",
                 event_type, sub_type)

        self._event_subscribers.append(sub)
        sub._ready_event.wait(timeout=EVENT_TIMEOUT)

    def _stop_event_subscribers(self):
        """
        Stops the event subscribers on cleanup.
        """
        try:
            for sub in self._event_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(
                            sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._event_subscribers = []

    @skip("IMS does't net implement topology")
    def test_hierarchy(self):
        self._create_launch_verify(BASE_PLATFORM_ID)

    @skip("Needs alignment with recent IMS changes")
    def test_single_platform(self):
        self._create_launch_verify('LJ01D')

    def _create_launch_verify(self, base_platform_id):
        # and trigger the traversal of the branch rooted at that base platform
        # to create corresponding ION objects and configuration dictionaries:

        pnode = self._network_definition.pnodes[base_platform_id]
        base_platform_objs = self._traverse(pnode, base_platform_id)

        # now that most of the topology information is there, add the
        # PlatformAgentInstance elements
        self._set_platform_agent_instances()

        base_platform_config = self.platform_configs[base_platform_id]

        log.info("base_platform_id = %r", base_platform_id)

        #-------------------------------------------------------------------------------------
        # Create Data Process Definition and Data Process for the eng stream monitor process
        #-------------------------------------------------------------------------------------
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='DemoStreamAlertTransform',
            description='For testing EventTriggeredTransform_B',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='DemoStreamAlertTransform')
        self.platform_dprocdef_id = self.dataprocessclient.create_data_process_definition(
            dpd_obj)

        #THERE SHOULD BE NO STREAMDEF REQUIRED HERE.
        platform_streamdef_id = self.pubsubcli.create_stream_definition(
            name='platform_eng_parsed', parameter_dictionary_id=self.pdict_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            platform_streamdef_id, self.platform_dprocdef_id, binding='output')

        config = {
            'process': {
                'timer_interval': 5,
                'queue_name': 'a_queue',
                'variable_name': 'input_voltage',
                'time_field_name': 'preferred_timestamp',
                'valid_values': [-100, 100],
                'timer_origin': 'Interval Timer'
            }
        }

        platform_data_process_id = self.dataprocessclient.create_data_process(
            self.platform_dprocdef_id, [self.data_product_id], {}, config)
        self.dataprocessclient.activate_data_process(platform_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        platform_data_process_id)

        #-------------------------------
        # Launch Base Platform AgentInstance, connect to the resource agent client
        #-------------------------------

        agent_instance_id = base_platform_objs['agent_instance_id']
        log.debug(
            "about to call imsclient.start_platform_agent_instance with id=%s",
            agent_instance_id)
        pid = self.imsclient.start_platform_agent_instance(
            platform_agent_instance_id=agent_instance_id)
        log.debug("start_platform_agent_instance returned pid=%s", pid)

        #wait for start
        instance_obj = self.imsclient.read_platform_agent_instance(
            agent_instance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(
            gate. await (90),
            "The platform agent instance did not spawn in 90 seconds")

        agent_instance_obj = self.imsclient.read_instrument_agent_instance(
            agent_instance_id)
        log.debug(
            'test_oms_create_and_launch: Platform agent instance obj: %s',
            str(agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._pa_client = ResourceAgentClient(
            'paclient',
            name=agent_instance_obj.agent_process_id,
            process=FakeProcess())
        log.debug(" test_oms_create_and_launch:: got pa client %s",
                  str(self._pa_client))

        log.debug("base_platform_config =\n%s", base_platform_config)

        # ping_agent can be issued before INITIALIZE
        retval = self._pa_client.ping_agent(timeout=TIMEOUT)
        log.debug('Base Platform ping_agent = %s', str(retval))

        # issue INITIALIZE command to the base platform, which will launch the
        # creation of the whole platform hierarchy rooted at base_platform_config['platform_id']
        #        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config))
        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform INITIALIZE = %s', str(retval))

        # GO_ACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform GO_ACTIVE = %s', str(retval))

        # RUN:
        cmd = AgentCommand(command=PlatformAgentEvent.RUN)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform RUN = %s', str(retval))

        # START_MONITORING:
        cmd = AgentCommand(command=PlatformAgentEvent.START_MONITORING)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform START_MONITORING = %s', str(retval))

        # wait for data sample
        # just wait for at least one -- see consume_data above
        log.info("waiting for reception of a data sample...")
        self._async_data_result.get(timeout=DATA_TIMEOUT)
        self.assertTrue(len(self._samples_received) >= 1)

        log.info("waiting a bit more for reception of more data samples...")
        sleep(15)
        log.info("Got data samples: %d", len(self._samples_received))

        # wait for event
        # just wait for at least one event -- see consume_event above
        log.info("waiting for reception of an event...")
        self._async_event_result.get(timeout=EVENT_TIMEOUT)
        log.info("Received events: %s", len(self._events_received))

        #get the extended platfrom which wil include platform aggreate status fields
        extended_platform = self.imsclient.get_platform_device_extension(
            self.device_id)
        #        log.debug( 'test_single_platform   extended_platform: %s', str(extended_platform) )
        #        log.debug( 'test_single_platform   power_status_roll_up: %s', str(extended_platform.computed.power_status_roll_up.value) )
        #        log.debug( 'test_single_platform   comms_status_roll_up: %s', str(extended_platform.computed.communications_status_roll_up.value) )

        # STOP_MONITORING:
        cmd = AgentCommand(command=PlatformAgentEvent.STOP_MONITORING)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform STOP_MONITORING = %s', str(retval))

        # GO_INACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform GO_INACTIVE = %s', str(retval))

        # RESET: Resets the base platform agent, which includes termination of
        # its sub-platforms processes:
        cmd = AgentCommand(command=PlatformAgentEvent.RESET)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug('Base Platform RESET = %s', str(retval))

        #-------------------------------
        # Stop Base Platform AgentInstance
        #-------------------------------
        self.imsclient.stop_platform_agent_instance(
            platform_agent_instance_id=agent_instance_id)
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0
        self.cci                  = 0

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset = Dataset('test_dataset_%i'%self.i)
        dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id)
        self.addCleanup(self.dataset_management.delete_dataset, dataset_id)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''
        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})
        self.addCleanup(self.container.terminate_process, pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        dataset_id = self.create_dataset(pdict_id)

        # self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)


    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.wait())

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   19 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2))
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)


    @unittest.skip('deprecated')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.wait())


    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
Beispiel #17
0
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient()
        self.rrclient = ResourceRegistryServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.pubsubcli = PubsubManagementServiceClient()
        self.ingestclient = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        self.stream_def_id = self.pubsubcli.create_stream_definition(
            name='SBE37_CDM')

        self.process_definitions = {}
        ingestion_worker_definition = ProcessDefinition(
            name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':
            'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class': 'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(
            process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space = 'science_granule_ingestion'
        self.exchange_point = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(
            self.process_definitions['ingestion_worker'], configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data',
            parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id,
                                                   PRED.hasStreamDefinition,
                                                   RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
                           name='DP2',
                           description='some new dp')

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream,
                                                RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition,
                                                  RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)

        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)

        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description, 'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(
            ComputedValueAvailability.PROVIDED,
            extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(
            0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)

        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)

        def ion_object_encoder(obj):
            return obj.__dict__

        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 0)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 1)
        self.assertEqual(
            data_product_data.associations['StreamDefinition'].
            associated_resources[0].s, dp_id)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 1)
        self.assertEqual(
            data_product_data.associations['Dataset'].associated_resources[0].
            s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')
        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)

    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))

    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
class TestPreloadThenLoadDataset(IonIntegrationTestCase):
    """ Uses the preload system to define the ExternalDataset and related resources,
        then invokes services to perform the load
    """

    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        config = dict(op="load", scenario="NOSE", attachments="res/preload/r2_ioc/attachments")
        self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config)
        self.pubsub = PubsubManagementServiceClient()
        self.dams = DataAcquisitionManagementServiceClient()

    @unittest.skip("depricated test, now in mi repo")
    def test_use_case(self):
        # setUp() has already started the container and performed the preload
#        self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources
        self.assert_dataset_loaded('Unit Test SMB37')           # association changed -- now use device name
        self.do_listen_for_incoming()                           # listen for any data being received from the dataset
        self.do_read_dataset()                                  # call services to load dataset
        self.assert_data_received()                             # check that data was received as expected
        self.do_shutdown()

    def assert_dataset_loaded(self, name):
        rr = self.container.resource_registry
#        self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset)
        devs, _ = rr.find_resources(RT.InstrumentDevice, name=name, id_only=False)
        self.assertEquals(len(devs), 1)
        self.device = devs[0]
        obj,_ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance)
        self.agent_instance = obj[0]
        obj,_ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id)
        self.agent = obj[0]

        driver_cfg = self.agent_instance.driver_config
        #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def']
        #self.stream_definition = rr.read(stream_definition_id)

        self.data_product = rr.read_object(subject=self.device._id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct)

        self.dataset_id = rr.read_object(subject=self.data_product._id, predicate=PRED.hasDataset, object_type=RT.Dataset, id_only=True)

        ids,_ = rr.find_objects(subject=self.data_product._id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        self.stream_id = ids[0]
        self.route = self.pubsub.read_stream_route(self.stream_id)

    def do_listen_for_incoming(self):
        subscription_id = self.pubsub.create_subscription('validator', data_product_ids=[self.data_product._id])
        self.addCleanup(self.pubsub.delete_subscription, subscription_id)

        self.granule_capture = []
        self.granule_count = 0
        def on_granule(msg, route, stream_id):
            self.granule_count += 1
            if self.granule_count < 5:
                self.granule_capture.append(msg)
        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub.deactivate_subscription, subscription_id)

        self.dataset_modified = Event()
        def cb2(*args, **kwargs):
            self.dataset_modified.set()
            # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.dataset_id)
        es.start()
        self.addCleanup(es.stop)

    def do_read_dataset(self):
        self.dams.start_external_dataset_agent_instance(self.agent_instance._id)
        #
        # should i wait for process (above) to start
        # before launching client (below)?
        #
        self.client = None
        end = time.time() + MAX_AGENT_START_TIME
        while not self.client and time.time() < end:
            try:
                self.client = ResourceAgentClient(self.device._id, process=FakeProcess())
            except NotFound:
                time.sleep(2)
        if not self.client:
            self.fail(msg='external dataset agent process did not start in %d seconds' % MAX_AGENT_START_TIME)
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.INITIALIZE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.GO_ACTIVE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN))
        self.client.execute_resource(command=AgentCommand(command=DriverEvent.START_AUTOSAMPLE))

    def assert_data_received(self):

        #let it go for up to 120 seconds, then stop the agent and reset it
        if not self.dataset_modified.is_set():
            self.dataset_modified.wait(30)
        self.assertTrue(self.granule_count > 2, msg='granule count = %d'%self.granule_count)

        rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0])
        self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01)
        self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01)
        self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01)
        self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01)
        self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1)

    def do_shutdown(self):
        self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
Beispiel #19
0
class ExhaustiveParameterTest(IonIntegrationTestCase):
    def setUp(self):
        self.i=0
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2params.yml')

        self.dataset_management      = DatasetManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.resource_registry       = self.container.resource_registry
        self.data_retriever          = DataRetrieverServiceClient()

        pdicts, _ = self.resource_registry.find_resources(restype='ParameterDictionary', id_only=False)
        self.dp_ids = []
        for pdict in pdicts:
            stream_def_id = self.pubsub_management.create_stream_definition(pdict.name, parameter_dictionary_id=pdict._id)
            dp_id = self.make_dp(stream_def_id)
            if dp_id: self.dp_ids.append(dp_id)

    def make_dp(self, stream_def_id):
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(
                name=stream_def.name,
                description=stream_def.name,
                processing_level_code='Parsed_Canonical')


        data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(data_product_id)
        return data_product_id

    def fill_values(self, ptype, size):
        if isinstance(ptype, ArrayType):
            return ['blah'] * size
        elif isinstance(ptype, QuantityType):
            return np.sin(np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3)
        elif isinstance(ptype, RecordType):
            return [{'record': 'ok'}] * size
        elif isinstance(ptype, ConstantRangeType):
            return (1,1000)
        elif isinstance(ptype, ConstantType):
            return np.dtype(ptype.value_encoding).type(1)
        elif isinstance(ptype, CategoryType):
            return ptype.categories.keys()[0]
        else:
            return


    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                extents = self.dataset_management.dataset_extents(dataset_id, rdt._pdict.temporal_parameter_name)[0]
                if rdt[rdt._pdict.temporal_parameter_name] and rdt[rdt._pdict.temporal_parameter_name][0] != rdt._pdict.get_context(rdt._pdict.temporal_parameter_name).fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    def write_to_data_product(self,data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name 
            return
        rdt[time_param] = np.arange(40)


        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id,40)


        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad

        
    def test_data_products(self):
        bad_data_products = {}
        for dp_id in self.dp_ids:
            try:
                bad_fields = self.write_to_data_product(dp_id)
                if bad_fields:
                    bad_data_products[dp_id] = "Couldn't write and retrieve %s." % bad_fields
            except:
                import traceback
                bad_data_products[dp_id] = traceback.format_exc()


        for dp_id, tb in bad_data_products.iteritems():
            print '----------'
            print 'Problem with %s' % self.resource_registry.read(dp_id).name
            print tb
            print '----------'


        if bad_data_products:
            raise AssertionError('There are bad parameter dictionaries.')
class BulkIngestBase(object):
    """
    awkward, non-obvious test class!  subclasses will implement data-specific methods and
    this test class will parse sample file and assert data was read.

    test_data_ingest: create resources and call...
        start_agent: starts agent and then call...
            start_listener: starts listeners for data, including one that when granule is received calls...
                get_retrieve_client: asserts that callback had some data

    See replacement TestPreloadThenLoadDataset.  A little more declarative and straight-forward, but much slower (requires preload).
    """

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.pubsub_management = PubsubManagementServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.data_acquisition_management = DataAcquisitionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node)
        self.resource_registry = self.container.resource_registry
        self.context_ids = self.build_param_contexts()
        self.setup_resources()

    def build_param_contexts(self):
        raise NotImplementedError("build_param_contexts must be implemented in child classes")

    def create_external_dataset(self):
        raise NotImplementedError("create_external_dataset must be implemented in child classes")

    def get_dvr_config(self):
        raise NotImplementedError("get_dvr_config must be implemented in child classes")

    def get_retrieve_client(self, dataset_id=""):
        raise NotImplementedError("get_retrieve_client must be implemented in child classes")

    def test_data_ingest(self):
        self.pdict_id = self.create_parameter_dict(self.name)
        self.stream_def_id = self.create_stream_def(self.name, self.pdict_id)
        self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id)
        self.dataset_id = self.get_dataset_id(self.data_product_id)
        self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id)
        self.external_dataset_id = self.create_external_dataset()
        self.data_producer_id = self.register_external_dataset(self.external_dataset_id)
        self.start_agent()

    def create_parameter_dict(self, name=""):
        return self.dataset_management.create_parameter_dictionary(
            name=name, parameter_context_ids=self.context_ids, temporal_context="time"
        )

    def create_stream_def(self, name="", pdict_id=""):
        return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id)

    def create_data_product(self, name="", description="", stream_def_id=""):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(
            name=name,
            description=description,
            processing_level_code="Parsed_Canonical",
            temporal_domain=tdom,
            spatial_domain=sdom,
        )

        data_product_id = self.data_product_management.create_data_product(
            data_product=dp_obj, stream_definition_id=stream_def_id
        )
        self.data_product_management.activate_data_product_persistence(data_product_id)
        return data_product_id

    def register_external_dataset(self, external_dataset_id=""):
        return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id)

    def get_dataset_id(self, data_product_id=""):
        dataset_ids, assocs = self.resource_registry.find_objects(
            subject=data_product_id, predicate="hasDataset", id_only=True
        )
        return dataset_ids[0]

    def get_stream_id_and_route(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        # self.create_logger(self.name, stream_id)
        return stream_id, route

    def start_agent(self):
        agent_config = {
            "driver_config": self.get_dvr_config(),
            "stream_config": {},
            "agent": {"resource_id": self.external_dataset_id},
            "test_mode": True,
        }

        self._ia_pid = self.container.spawn_process(
            name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config
        )

        self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess())

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(command=cmd)

        self.start_listener(self.dataset_id)

    def stop_agent(self):
        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        self.container.terminate_process(self._ia_pid)

    def start_listener(self, dataset_id=""):
        dataset_modified = Event()
        # callback to use retrieve to get data from the coverage
        def cb(*args, **kwargs):
            self.get_retrieve_client(dataset_id=dataset_id)

        # callback to keep execution going once dataset has been fully ingested
        def cb2(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id)
        es.start()

        es2 = EventSubscriber(
            event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin="BaseDataHandler._acquire_sample"
        )
        es2.start()

        self.addCleanup(es.stop)
        self.addCleanup(es2.stop)

        # let it go for up to 120 seconds, then stop the agent and reset it
        dataset_modified.wait(120)
        self.stop_agent()

    def create_logger(self, name, stream_id=""):

        # logger process
        producer_definition = ProcessDefinition(name=name + "_logger")
        producer_definition.executable = {
            "module": "ion.processes.data.stream_granule_logger",
            "class": "StreamGranuleLogger",
        }

        logger_procdef_id = self.process_dispatch_client.create_process_definition(
            process_definition=producer_definition
        )
        configuration = {"process": {"stream_id": stream_id}}
        pid = self.process_dispatch_client.schedule_process(
            process_definition_id=logger_procdef_id, configuration=configuration
        )

        return pid
Beispiel #21
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description='Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
#        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict)
        log.debug("Got the stream def for parsed input: %s", input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj,
                                                                    stream_definition_id=input_stream_def_dict
                                                                    )
        self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        self.output_products = {'L0_stream' : L0_stream_dp_id}
        out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products)
        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
            stream_ids=[output_stream_id],
            exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)


        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)


    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
class TestPreloadThenLoadDataset(IonIntegrationTestCase):
    """ replicates the TestHypm_WPF_CTD test (same handler/parser/data file)
        but uses the preload system to define the ExternalDataset and related resources,
        then invokes services to perform the load
    """

    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        config = dict(op="load", scenario="BETA,NOSE", attachments="res/preload/r2_ioc/attachments")
        self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config)
        self.pubsub = PubsubManagementServiceClient()
        self.dams = DataAcquisitionManagementServiceClient()

    def find_object_by_name(self, name, resource_type):
        objects,_ = self.container.resource_registry.find_resources(resource_type)
        self.assertTrue(len(objects) >= 1)
#        filtered_objs = [obj for obj in objects if obj.name == name]
        filtered_objs = []
        for obj in objects:
            if obj.name==name:
                filtered_objs.append(obj)
        self.assertEquals(len(filtered_objs), 1, msg='Found %d objects with name %s'%(len(filtered_objs),name))
        return filtered_objs[0]

    def test_use_case(self):
        # setUp() has already started the container and performed the preload
#        self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources
        self.assert_dataset_loaded('Unit Test SMB37')           # association changed -- now use device name
        self.do_listen_for_incoming()                           # listen for any data being received from the dataset
        self.do_read_dataset()                                  # call services to load dataset
        self.assert_data_received()                             # check that data was received as expected
        self.do_shutdown()

    def assert_dataset_loaded(self, name):
#        self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset)
        self.device = self.find_object_by_name(name, RT.InstrumentDevice)
        rr = self.container.resource_registry
        obj,_ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance)
        self.agent_instance = obj[0]
        obj,_ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id)
        self.agent = obj[0]
        stream_definition_id = self.agent_instance.dataset_driver_config['dh_cfg']['stream_def'] if 'dh_cfg' in self.agent_instance.dataset_driver_config else self.agent_instance.dataset_driver_config['stream_def']
        self.stream_definition = rr.read(stream_definition_id)
#        data_producer_id = self.agent_instance.dataset_driver_config['dh_cfg']['data_producer_id'] if 'dh_cfg' in self.agent_instance.dataset_driver_config else self.agent_instance.dataset_driver_config['data_producer_id']
#        self.data_producer = rr.read(data_producer_id) #subject="", predicate="", object_type="", assoc="", id_only=False)
#        self.data_product = rr.read_object(object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, subject=self.external_dataset._id)
        self.data_product = rr.read_object(object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, subject=self.device._id)
        ids,_ = rr.find_objects(self.data_product._id, PRED.hasStream, RT.Stream, id_only=True)
        self.stream_id = ids[0]
        self.route = self.pubsub.read_stream_route(self.stream_id)

    def do_listen_for_incoming(self):
        subscription_id = self.pubsub.create_subscription('validator', data_product_ids=[self.data_product._id])
        self.addCleanup(self.pubsub.delete_subscription, subscription_id)

        self.granule_capture = []
        self.granule_count = 0
        def on_granule(msg, route, stream_id):
            self.granule_count += 1
            if self.granule_count<5:
                self.granule_capture.append(msg)
        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub.deactivate_subscription, subscription_id)

        self.dataset_modified = Event()
        def cb2(*args, **kwargs):
            self.dataset_modified.set()
            # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.device._id)
        es.start()
        self.addCleanup(es.stop)

    def do_read_dataset(self):

        self.dams.start_external_dataset_agent_instance(self.agent_instance._id)
        #
        # should i wait for process (above) to start
        # before launching client (below)?
        #
        self.client = ResourceAgentClient(self.device._id, process=FakeProcess())
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.INITIALIZE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.GO_ACTIVE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN))
        self.client.execute_resource(command=AgentCommand(command=DriverEvent.START_AUTOSAMPLE))

    def assert_data_received(self):

        #let it go for up to 120 seconds, then stop the agent and reset it
        if not self.dataset_modified.is_set():
            self.dataset_modified.wait(30)
        self.assertTrue(self.granule_count>2, msg='granule count = %d'%self.granule_count)

        rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0])
        self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01)
        self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01)
        self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01)
        self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01)
        self.assertAlmostEqual(1318219097, rdt['time'][0], delta=1)

    def do_shutdown(self):
        self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
class TestOmsLaunch(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()


        self.platformModel_id = None

        # rsn_oms: to retrieve network structure and information from RSN-OMS:
        # Note that OmsClientFactory will create an "embedded" RSN OMS
        # simulator object by default.
        self.rsn_oms = OmsClientFactory.create_instance()

        self.all_platforms = {}
        self.topology = {}
        self.agent_device_map = {}
        self.agent_streamconfig_map = {}

        self._async_data_result = AsyncResult()
        self._data_subscribers = []
        self._samples_received = []
        self.addCleanup(self._stop_data_subscribers)

        self._async_event_result = AsyncResult()
        self._event_subscribers = []
        self._events_received = []
        self.addCleanup(self._stop_event_subscribers)
        self._start_event_subscriber()

        self._set_up_DataProduct_obj()
        self._set_up_PlatformModel_obj()

    def _set_up_DataProduct_obj(self):
        # Create data product object to be used for each of the platform log streams
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True)
        self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition(
            name='platform_eng', parameter_dictionary_id=pdict_id)
        self.dp_obj = IonObject(RT.DataProduct,
            name='platform_eng data',
            description='platform_eng test',
            temporal_domain = tdom,
            spatial_domain = sdom)

    def _set_up_PlatformModel_obj(self):
        # Create PlatformModel
        platformModel_obj = IonObject(RT.PlatformModel,
                                      name='RSNPlatformModel',
                                      description="RSNPlatformModel")
        try:
            self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new PLatformModel: %s" %ex)
        log.debug( 'new PlatformModel id = %s', self.platformModel_id)

    def _traverse(self, platform_id, parent_platform_objs=None):
        """
        Recursive routine that repeatedly calls _prepare_platform to build
        the object dictionary for each platform.

        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval the dict returned by _prepare_platform at this level.
        """

        log.info("Starting _traverse for %r", platform_id)

        plat_objs = self._prepare_platform(platform_id, parent_platform_objs)

        self.all_platforms[platform_id] = plat_objs

        # now, traverse the children:
        retval = self.rsn_oms.getSubplatformIDs(platform_id)
        subplatform_ids = retval[platform_id]
        for subplatform_id in subplatform_ids:
            self._traverse(subplatform_id, plat_objs)

        # note, topology indexed by platform_id
        self.topology[platform_id] = plat_objs['children']

        return plat_objs

    def _prepare_platform(self, platform_id, parent_platform_objs):
        """
        This routine generalizes the manual construction currently done in
        test_oms_launch.py. It is called by the recursive _traverse method so
        all platforms starting from a given base platform are prepared.

        Note: For simplicity in this test, sites are organized in the same
        hierarchical way as the platforms themselves.

        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval a dict of associated objects similar to those in
                test_oms_launch
        """

        site__obj = IonObject(RT.PlatformSite,
                            name='%s_PlatformSite' % platform_id,
                            description='%s_PlatformSite platform site' % platform_id)

        site_id = self.omsclient.create_platform_site(site__obj)

        if parent_platform_objs:
            # establish hasSite association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['site_id'],
                predicate=PRED.hasSite,
                object=site_id)

        # prepare platform attributes and ports:
        monitor_attributes = self._prepare_platform_attributes(platform_id)
        ports =              self._prepare_platform_ports(platform_id)

        device__obj = IonObject(RT.PlatformDevice,
                        name='%s_PlatformDevice' % platform_id,
                        description='%s_PlatformDevice platform device' % platform_id,
                        ports=ports,
                        platform_monitor_attributes = monitor_attributes)

        device_id = self.imsclient.create_platform_device(device__obj)

        self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, device_id)
        self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=device_id)
        self.damsclient.register_instrument(instrument_id=device_id)


        if parent_platform_objs:
            # establish hasDevice association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['device_id'],
                predicate=PRED.hasDevice,
                object=device_id)

        agent__obj = IonObject(RT.PlatformAgent,
                            name='%s_PlatformAgent' % platform_id,
                            description='%s_PlatformAgent platform agent' % platform_id)

        agent_id = self.imsclient.create_platform_agent(agent__obj)

        if parent_platform_objs:
            # add this platform_id to parent's children:
            parent_platform_objs['children'].append(platform_id)


        self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id)

#        agent_instance_obj = IonObject(RT.PlatformAgentInstance,
#                                name='%s_PlatformAgentInstance' % platform_id,
#                                description="%s_PlatformAgentInstance" % platform_id)
#
#        agent_instance_id = self.imsclient.create_platform_agent_instance(
#                            agent_instance_obj, agent_id, device_id)

        plat_objs = {
            'platform_id':        platform_id,
            'site__obj':          site__obj,
            'site_id':            site_id,
            'device__obj':        device__obj,
            'device_id':          device_id,
            'agent__obj':         agent__obj,
            'agent_id':           agent_id,
#            'agent_instance_obj': agent_instance_obj,
#            'agent_instance_id':  agent_instance_id,
            'children':           []
        }

        log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs))

        self.agent_device_map[platform_id] = device__obj

        stream_config = self._create_stream_config(plat_objs)
        self.agent_streamconfig_map[platform_id] = stream_config
#        self._start_data_subscriber(agent_instance_id, stream_config)

        return plat_objs

    def _prepare_platform_attributes(self, platform_id):
        """
        Returns the list of PlatformMonitorAttributes objects corresponding to
        the attributes associated to the given platform.
        """
        result = self.rsn_oms.getPlatformAttributes(platform_id)
        self.assertTrue(platform_id in result)
        ret_infos = result[platform_id]

        monitor_attributes = []
        for attrName, attrDfn in ret_infos.iteritems():
            log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName)

            monitor_rate = attrDfn['monitorCycleSeconds']
            units =        attrDfn['units']

            plat_attr_obj = IonObject(OT.PlatformMonitorAttributes,
                                      id=attrName,
                                      monitor_rate=monitor_rate,
                                      units=units)

            monitor_attributes.append(plat_attr_obj)

        return monitor_attributes

    def _prepare_platform_ports(self, platform_id):
        """
        Returns the list of PlatformPort objects corresponding to the ports
        associated to the given platform.
        """
        result = self.rsn_oms.getPlatformPorts(platform_id)
        self.assertTrue(platform_id in result)
        port_dict = result[platform_id]

        ports = []
        for port_id, port in port_dict.iteritems():
            log.debug("platform_id=%r: preparing port=%r", platform_id, port_id)
            ip_address = port['comms']['ip']
            plat_port_obj = IonObject(OT.PlatformPort,
                                      port_id=port_id,
                                      ip_address=ip_address)

            ports.append(plat_port_obj)

        return ports

    def _create_stream_config(self, plat_objs):

        platform_id = plat_objs['platform_id']
        device_id =   plat_objs['device_id']


        #create the log data product
        self.dp_obj.name = '%s platform_eng data' % platform_id
        data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id)
        self.damsclient.assign_data_product(input_resource_id=device_id, data_product_id=data_product_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True)

        stream_config = self._build_stream_config(stream_ids[0])
        return stream_config

    def _build_stream_config(self, stream_id=''):

        platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed')

        #get the streamroute object from pubsub by passing the stream_id
        stream_def_ids, _ = self.rrclient.find_objects(stream_id,
            PRED.hasStreamDefinition,
            RT.StreamDefinition,
            True)


        stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id)
        stream_config = {'routing_key' : stream_route.routing_key,
                         'stream_id' : stream_id,
                         'stream_definition_ref' : stream_def_ids[0],
                         'exchange_point' : stream_route.exchange_point,
                         'parameter_dictionary':platform_eng_dictionary.dump()}

        return stream_config

    def _set_platform_agent_instances(self):
        """
        Once most of the objs/defs associated with all platforms are in
        place, this method creates and associates the PlatformAgentInstance
        elements.
        """

        self.platform_configs = {}
        for platform_id, plat_objs in self.all_platforms.iteritems():

            PLATFORM_CONFIG = self.platform_configs[platform_id] = {
                'platform_id':             platform_id,
                'platform_topology':       self.topology,

#                'agent_device_map':        self.agent_device_map,
                'agent_streamconfig_map':  self.agent_streamconfig_map,

                'driver_config':           DVR_CONFIG,
            }

            agent_config = {
                'platform_config': PLATFORM_CONFIG,
            }

            agent_instance_obj = IonObject(RT.PlatformAgentInstance,
                                    name='%s_PlatformAgentInstance' % platform_id,
                                    description="%s_PlatformAgentInstance" % platform_id,
                                    agent_config=agent_config)

            agent_id = plat_objs['agent_id']
            device_id = plat_objs['device_id']
            agent_instance_id = self.imsclient.create_platform_agent_instance(
                                agent_instance_obj, agent_id, device_id)

            plat_objs['agent_instance_obj'] = agent_instance_obj
            plat_objs['agent_instance_id']  = agent_instance_id


            stream_config = self.agent_streamconfig_map[platform_id]
            self._start_data_subscriber(agent_instance_id, stream_config)


    def _start_data_subscriber(self, stream_name, stream_config):
        """
        Starts data subscriber for the given stream_name and stream_config
        """

        def consume_data(message, stream_route, stream_id):
            # A callback for processing subscribed-to data.
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            self._async_data_result.set()

        log.info('_start_data_subscriber stream_name=%r', stream_name)

        stream_id = stream_config['stream_id']

        # Create subscription for the stream
        exchange_name = '%s_queue' % stream_name
        self.container.ex_manager.create_xn_queue(exchange_name).purge()
        sub = StandaloneStreamSubscriber(exchange_name, consume_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id])
        self.pubsubcli.activate_subscription(sub_id)
        sub.subscription_id = sub_id

    def _stop_data_subscribers(self):
        """
        Stop the data subscribers on cleanup.
        """
        try:
            for sub in self._data_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._data_subscribers = []

    def _start_event_subscriber(self, event_type="PlatformAlarmEvent", sub_type="power"):
        """
        Starts event subscriber for events of given event_type ("PlatformAlarmEvent"
        by default) and given sub_type ("power" by default).
        """
        # TODO note: ion-definitions still using 'PlatformAlarmEvent' but we
        # should probably define 'PlatformExternalEvent' or something like that.

        def consume_event(evt, *args, **kwargs):
            # A callback for consuming events.
            log.info('Event subscriber received evt: %s.', str(evt))
            self._events_received.append(evt)
            self._async_event_result.set(evt)

        sub = EventSubscriber(event_type=event_type,
                              sub_type=sub_type,
                              callback=consume_event)

        sub.start()
        log.info("registered event subscriber for event_type=%r, sub_type=%r",
                 event_type, sub_type)

        self._event_subscribers.append(sub)
        sub._ready_event.wait(timeout=EVENT_TIMEOUT)

    def _stop_event_subscribers(self):
        """
        Stops the event subscribers on cleanup.
        """
        try:
            for sub in self._event_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._event_subscribers = []

    def test_oms_create_and_launch(self):

        # pick a base platform:
        base_platform_id = BASE_PLATFORM_ID

        # and trigger the traversal of the branch rooted at that base platform
        # to create corresponding ION objects and configuration dictionaries:
        base_platform_objs = self._traverse(base_platform_id)

        # now that most of the topology information is there, add the
        # PlatformAgentInstance elements
        self._set_platform_agent_instances()

        base_platform_config = self.platform_configs[base_platform_id]

        log.info("base_platform_id = %r", base_platform_id)
        log.info("topology = %s", str(self.topology))


        #-------------------------------
        # Launch Base Platform AgentInstance, connect to the resource agent client
        #-------------------------------

        agent_instance_id = base_platform_objs['agent_instance_id']
        pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
        log.debug("start_platform_agent_instance returned pid=%s", pid)

        #wait for start
        instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds")

        agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id)
        log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id,  process=FakeProcess())
        log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client))

        log.debug("base_platform_config =\n%s", base_platform_config)

        # ping_agent can be issued before INITIALIZE
        retval = self._pa_client.ping_agent(timeout=TIMEOUT)
        log.debug( 'Base Platform ping_agent = %s', str(retval) )

        # issue INITIALIZE command to the base platform, which will launch the
        # creation of the whole platform hierarchy rooted at base_platform_config['platform_id']
#        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config))
        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform INITIALIZE = %s', str(retval) )


        # GO_ACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) )

        # RUN: this command includes the launch of the resource monitoring greenlets
        cmd = AgentCommand(command=PlatformAgentEvent.RUN)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RUN = %s', str(retval) )

        # START_EVENT_DISPATCH
        kwargs = dict(params="TODO set params")
        cmd = AgentCommand(command=PlatformAgentEvent.START_EVENT_DISPATCH, kwargs=kwargs)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        self.assertTrue(retval.result is not None)

        # wait for data sample
        # just wait for at least one -- see consume_data above
        log.info("waiting for reception of a data sample...")
        self._async_data_result.get(timeout=DATA_TIMEOUT)
        self.assertTrue(len(self._samples_received) >= 1)

        log.info("waiting a bit more for reception of more data samples...")
        sleep(10)
        log.info("Got data samples: %d", len(self._samples_received))


        # wait for event
        # just wait for at least one event -- see consume_event above
        log.info("waiting for reception of an event...")
        self._async_event_result.get(timeout=EVENT_TIMEOUT)
        log.info("Received events: %s", len(self._events_received))


        # STOP_EVENT_DISPATCH
        cmd = AgentCommand(command=PlatformAgentEvent.STOP_EVENT_DISPATCH)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        self.assertTrue(retval.result is not None)

        # GO_INACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) )

        # RESET: Resets the base platform agent, which includes termination of
        # its sub-platforms processes:
        cmd = AgentCommand(command=PlatformAgentEvent.RESET)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RESET = %s', str(retval) )

        #-------------------------------
        # Stop Base Platform AgentInstance
        #-------------------------------
        self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
Beispiel #24
0
class BulkIngestBase(object):
    """
    awkward, non-obvious test class!  subclasses will implement data-specific methods and
    this test class will parse sample file and assert data was read.

    test_data_ingest: create resources and call...
        start_agent: starts agent and then call...
            start_listener: starts listeners for data, including one that when granule is received calls...
                get_retrieve_client: asserts that callback had some data

    See replacement TestPreloadThenLoadDataset.  A little more declarative and straight-forward, but much slower (requires preload).
    """
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub_management    = PubsubManagementServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.data_acquisition_management = DataAcquisitionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node)
        self.resource_registry       = self.container.resource_registry
        self.context_ids = self.build_param_contexts()
        self.setup_resources()

    def build_param_contexts(self):
        raise NotImplementedError('build_param_contexts must be implemented in child classes')

    def create_external_dataset(self):
        raise NotImplementedError('create_external_dataset must be implemented in child classes')

    def get_dvr_config(self):
        raise NotImplementedError('get_dvr_config must be implemented in child classes')

    def get_retrieve_client(self, dataset_id=''):
        raise NotImplementedError('get_retrieve_client must be implemented in child classes')

    def test_data_ingest(self):
        self.pdict_id = self.create_parameter_dict(self.name)
        self.stream_def_id = self.create_stream_def(self.name, self.pdict_id)
        self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id)
        self.dataset_id = self.get_dataset_id(self.data_product_id)
        self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id)
        self.external_dataset_id = self.create_external_dataset()
        self.data_producer_id = self.register_external_dataset(self.external_dataset_id)
        self.start_agent()

    def create_parameter_dict(self, name=''):
        return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time')

    def create_stream_def(self, name='', pdict_id=''):
        return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id)

    def create_data_product(self, name='', description='', stream_def_id=''):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(
            name=name,
            description=description,
            processing_level_code='Parsed_Canonical',
            temporal_domain=tdom,
            spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)
        return data_product_id

    def register_external_dataset(self, external_dataset_id=''):
        return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id)

    def get_dataset_id(self, data_product_id=''):
        dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True)
        return dataset_ids[0]

    def get_stream_id_and_route(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        #self.create_logger(self.name, stream_id)
        return stream_id, route

    def start_agent(self):
        agent_config = {
            'driver_config': self.get_dvr_config(),
            'stream_config': {},
            'agent': {'resource_id': self.external_dataset_id},
            'test_mode': True
        }

        self._ia_pid = self.container.spawn_process(
            name=self.EDA_NAME,
            module=self.EDA_MOD,
            cls=self.EDA_CLS,
            config=agent_config)

        self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess())

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(command=cmd)

        self.start_listener(self.dataset_id)

    def stop_agent(self):
        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)
        self.container.terminate_process(self._ia_pid)

    def start_listener(self, dataset_id=''):
        dataset_modified = Event()
        #callback to use retrieve to get data from the coverage
        def cb(*args, **kwargs):
            self.get_retrieve_client(dataset_id=dataset_id)

        #callback to keep execution going once dataset has been fully ingested
        def cb2(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id)
        es.start()

        es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample')
        es2.start()

        self.addCleanup(es.stop)
        self.addCleanup(es2.stop)

        #let it go for up to 120 seconds, then stop the agent and reset it
        dataset_modified.wait(120)
        self.stop_agent()

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration)

        return pid
Beispiel #25
0
class TestPreloadThenLoadDataset(IonIntegrationTestCase):
    """ Uses the preload system to define the ExternalDataset and related resources,
        then invokes services to perform the load
    """
    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        config = dict(op="load",
                      scenario="NOSE",
                      attachments="res/preload/r2_ioc/attachments")
        self.container.spawn_process("Loader",
                                     "ion.processes.bootstrap.ion_loader",
                                     "IONLoader",
                                     config=config)
        self.pubsub = PubsubManagementServiceClient()
        self.dams = DataAcquisitionManagementServiceClient()

    @unittest.skip("depricated test, now in mi repo")
    def test_use_case(self):
        # setUp() has already started the container and performed the preload
        #        self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources
        self.assert_dataset_loaded(
            'Unit Test SMB37')  # association changed -- now use device name
        self.do_listen_for_incoming(
        )  # listen for any data being received from the dataset
        self.do_read_dataset()  # call services to load dataset
        self.assert_data_received()  # check that data was received as expected
        self.do_shutdown()

    def assert_dataset_loaded(self, name):
        rr = self.container.resource_registry
        #        self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset)
        devs, _ = rr.find_resources(RT.InstrumentDevice,
                                    name=name,
                                    id_only=False)
        self.assertEquals(len(devs), 1)
        self.device = devs[0]
        obj, _ = rr.find_objects(subject=self.device._id,
                                 predicate=PRED.hasAgentInstance,
                                 object_type=RT.ExternalDatasetAgentInstance)
        self.agent_instance = obj[0]
        obj, _ = rr.find_objects(object_type=RT.ExternalDatasetAgent,
                                 predicate=PRED.hasAgentDefinition,
                                 subject=self.agent_instance._id)
        self.agent = obj[0]

        driver_cfg = self.agent_instance.driver_config
        #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def']
        #self.stream_definition = rr.read(stream_definition_id)

        self.data_product = rr.read_object(subject=self.device._id,
                                           predicate=PRED.hasOutputProduct,
                                           object_type=RT.DataProduct)

        self.dataset_id = rr.read_object(subject=self.data_product._id,
                                         predicate=PRED.hasDataset,
                                         object_type=RT.Dataset,
                                         id_only=True)

        ids, _ = rr.find_objects(subject=self.data_product._id,
                                 predicate=PRED.hasStream,
                                 object_type=RT.Stream,
                                 id_only=True)
        self.stream_id = ids[0]
        self.route = self.pubsub.read_stream_route(self.stream_id)

    def do_listen_for_incoming(self):
        subscription_id = self.pubsub.create_subscription(
            'validator', data_product_ids=[self.data_product._id])
        self.addCleanup(self.pubsub.delete_subscription, subscription_id)

        self.granule_capture = []
        self.granule_count = 0

        def on_granule(msg, route, stream_id):
            self.granule_count += 1
            if self.granule_count < 5:
                self.granule_capture.append(msg)

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub.deactivate_subscription, subscription_id)

        self.dataset_modified = Event()

        def cb2(*args, **kwargs):
            self.dataset_modified.set()
            # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb2,
                             origin=self.dataset_id)
        es.start()
        self.addCleanup(es.stop)

    def do_read_dataset(self):
        self.dams.start_external_dataset_agent_instance(
            self.agent_instance._id)
        #
        # should i wait for process (above) to start
        # before launching client (below)?
        #
        self.client = None
        end = time.time() + MAX_AGENT_START_TIME
        while not self.client and time.time() < end:
            try:
                self.client = ResourceAgentClient(self.device._id,
                                                  process=FakeProcess())
            except NotFound:
                time.sleep(2)
        if not self.client:
            self.fail(
                msg='external dataset agent process did not start in %d seconds'
                % MAX_AGENT_START_TIME)
        self.client.execute_agent(
            AgentCommand(command=ResourceAgentEvent.INITIALIZE))
        self.client.execute_agent(
            AgentCommand(command=ResourceAgentEvent.GO_ACTIVE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN))
        self.client.execute_resource(command=AgentCommand(
            command=DriverEvent.START_AUTOSAMPLE))

    def assert_data_received(self):

        #let it go for up to 120 seconds, then stop the agent and reset it
        if not self.dataset_modified.is_set():
            self.dataset_modified.wait(30)
        self.assertTrue(self.granule_count > 2,
                        msg='granule count = %d' % self.granule_count)

        rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0])
        self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01)
        self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01)
        self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01)
        self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01)
        self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1)

    def do_shutdown(self):
        self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
Beispiel #26
0
class ExhaustiveParameterTest(IonIntegrationTestCase):
    def setUp(self):
        self.i = 0
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2params.yml')

        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.resource_registry = self.container.resource_registry
        self.data_retriever = DataRetrieverServiceClient()

        pdicts, _ = self.resource_registry.find_resources(
            restype='ParameterDictionary', id_only=False)
        self.dp_ids = []
        for pdict in pdicts:
            stream_def_id = self.pubsub_management.create_stream_definition(
                pdict.name, parameter_dictionary_id=pdict._id)
            dp_id = self.make_dp(stream_def_id)
            if dp_id: self.dp_ids.append(dp_id)

    def make_dp(self, stream_def_id):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(name=stream_def.name,
                             description=stream_def.name,
                             processing_level_code='Parsed_Canonical',
                             temporal_domain=tdom,
                             spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(
            dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        return data_product_id

    def fill_values(self, ptype, size):
        if isinstance(ptype, ArrayType):
            return ['blah'] * size
        elif isinstance(ptype, QuantityType):
            return np.sin(
                np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3)
        elif isinstance(ptype, RecordType):
            return [{'record': 'ok'}] * size
        elif isinstance(ptype, ConstantRangeType):
            return (1, 1000)
        elif isinstance(ptype, ConstantType):
            return np.dtype(ptype.value_encoding).type(1)
        elif isinstance(ptype, CategoryType):
            return ptype.categories.keys()[0]
        else:
            return

    def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                granule = self.data_retriever.retrieve_last_data_points(
                    dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                extents = self.dataset_management.dataset_extents(
                    dataset_id, rdt._pdict.temporal_parameter_name)[0]
                if rdt[rdt._pdict.temporal_parameter_name] and rdt[
                        rdt._pdict.
                        temporal_parameter_name][0] != rdt._pdict.get_context(
                            rdt._pdict.temporal_parameter_name
                        ).fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    def write_to_data_product(self, data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                             'hasDataset',
                                                             id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                            'hasStream',
                                                            id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(
            stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(
                data_product_id).name
            return
        rdt[time_param] = np.arange(40)

        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(
                rdt._pdict.get_context(field).param_type, 40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad

    def test_data_products(self):
        bad_data_products = {}
        for dp_id in self.dp_ids:
            try:
                bad_fields = self.write_to_data_product(dp_id)
                if bad_fields:
                    bad_data_products[
                        dp_id] = "Couldn't write and retrieve %s." % bad_fields
            except:
                import traceback
                bad_data_products[dp_id] = traceback.format_exc()

        for dp_id, tb in bad_data_products.iteritems():
            print '----------'
            print 'Problem with %s' % self.resource_registry.read(dp_id).name
            print tb
            print '----------'

        if bad_data_products:
            raise AssertionError('There are bad parameter dictionaries.')
class TestTransformWorker(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process = TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client,
                                  self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id,
                                                 PRED.hasDataset,
                                                 id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj, _ = self.rrclient.find_resources(
            restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name='test_ctd_device',
                                       description="test_ctd_device",
                                       serial_number="12345")
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )

        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type']
                        == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj, _ = self.rrclient.find_resources(
            restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice,
                                            name='TestPlatform',
                                            description="TestPlatform",
                                            serial_number="12345")
            platform_device_id = self.imsclient.create_platform_device(
                platform_device=platform_device_obj)

        self.damsclient.assign_data_product(
            input_resource_id=platform_device_id,
            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]
                        ['type'] == 'PlatformDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS)
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Input data product does not contain the parameters defined in argument map"
        )

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Output data product does not contain the output parameter name provided"
        )

    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map = {"a": "salinity"}

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM)

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id,
            add_array_dpd_id,
            binding='validate_salinity_array')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=None,
            argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a')
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=[dp1_func_output_dp_id],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject(RT.Attachment,
                                   name='test_attachment',
                                   attachment_type=AttachmentType.ASCII,
                                   content_type='text/plain',
                                   content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id,
                                                 attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id

    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription(
            'validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn(data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()

    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn(stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin,
                                      self.stream_id)
        np.testing.assert_array_equal(status_alert_event.values,
                                      np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s",
                  str(status_alert_event.__dict__))
        self.event_verified.set()

    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent,
                             callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)

    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1, 2)
        self.assertEquals(a, 3)
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?!

        self.dataset_management      = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()


    def _create_proc_def(self):
        dpd_obj = DataProcessDefinition(
            name='Optimus',
            description='It\'s a transformer',
            module='ion.processes.data.transforms.transform_prime',
            class_name='TransformPrime')
        return self.data_process_management.create_data_process_definition(dpd_obj)
    

    def _L0_pdict(self):

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management.create_parameter_context(name='time', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom)

        lat_ctxt = ParameterContext('lat', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt.fill_value = -9999
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='lat', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom)

        lon_ctxt = ParameterContext('lon', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt.fill_value = -9999
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='lon', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom)


        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        temp_ctxt.uom = 'deg_C'
        temp_ctxt.fill_value = -9999
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom)

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        cond_ctxt.uom = 'S m-1'
        cond_ctxt.fill_value = -9999
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom)

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        press_ctxt.uom = 'dbar'
        press_ctxt.fill_value = -9999
        press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom)


        context_ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id]

        pdict_id = self.dataset_management.create_parameter_dictionary('L0 SBE37', parameter_context_ids=context_ids, temporal_context='time')

        return pdict_id


    def _L1_pdict(self):
        pdict_id = self._L0_pdict()
        param_context_ids = self.dataset_management.read_parameter_contexts(pdict_id,id_only=True)


        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(TEMPWAT_L0 / 10000) - 10'
        tl1_pmap = {'TEMPWAT_L0':'TEMPWAT_L0'}
        func = NumexprFunction('TEMPWAT_L1', tl1_func, tl1_pmap)
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'

        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom)
        param_context_ids.append(tempL1_ctxt_id)

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(CONDWAT_L0 / 100000) - 0.5'
        cl1_pmap = {'CONDWAT_L0':'CONDWAT_L0'}
        func = NumexprFunction('CONDWAT_L1', cl1_func, cl1_pmap)
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom)
        param_context_ids.append(condL1_ctxt_id)
                

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)'
        pl1_pmap = {'PRESWAT_L0':'PRESWAT_L0'}
        func = NumexprFunction('PRESWAT_L1', pl1_func, pl1_pmap)
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom)
        param_context_ids.append(presL1_ctxt_id)

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = [NumexprFunction('CONDWAT_L1*10', 'C*10', {'C':'CONDWAT_L1'}), 'TEMPWAT_L1', 'PRESWAT_L1']
        sal_kwargmap = None
        func = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap)
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'

        sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom)
        param_context_ids.append(sal_ctxt_id)

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'lon','lat'], None)
        #abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None)
        cons_temp_func = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_func, 'TEMPWAT_L1', 'PRESWAT_L1'], None)
        dens_func = PythonFunction('DENSITY', owner, 'rho', [abs_sal_func, cons_temp_func, 'PRESWAT_L1'], None)
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'

        dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom)
        param_context_ids.append(dens_ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary('L1_SBE37', parameter_context_ids=param_context_ids, temporal_context='time')
        return pdict_id

    def _data_product(self, name, stream_def, exchange_pt):
        tdom, sdom = time_series_domain()
        dp_obj = DataProduct(name=name, description='blah', spatial_domain=sdom.dump(), temporal_domain=tdom.dump())
        dp_id = self.data_product_management.create_data_product(dp_obj, stream_def, exchange_pt)
        return dp_id

    def _data_process(self, proc_def_id, input_products, output_product, stream_def):
        fake_producer = DataProducer(name='fake_producer')

        fake_producer_id, _  = self.container.resource_registry.create(fake_producer)

        self.data_process_management.assign_stream_definition_to_data_process_definition(stream_def,proc_def_id,binding='output')
        
        data_process_id = self.data_process_management.create_data_process(proc_def_id, input_products, {'output':output_product})
        self.container.resource_registry.create_association(subject=data_process_id, predicate=PRED.hasDataProducer, object=fake_producer_id)

        self.data_process_management.activate_data_process(data_process_id)

    def _fake_producer(self):
        if not hasattr(self, 'producer'):
            self.fake_producer_id,_ = self.container.resource_registry.create(DataProducer(name='fake_producer'))
        return self.fake_producer_id

    def _publisher(self, data_product_id):
        stream_ids, _ = self.container.resource_registry.find_resources(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]

        route = self.pubsub_management.read_stream_route(stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        return publisher
    
    def _get_param_vals(self, name, slice_, dims):
        shp = utils.slice_shape(slice_, dims)
        def _getarr(vmin, shp, vmax=None,):
            if vmax is None:
                return np.empty(shp).fill(vmin)
            return np.arange(vmin, vmax, (vmax - vmin) / int(utils.prod(shp)), dtype='float32').reshape(shp)

        if name == 'LAT':
            ret = np.empty(shp)
            ret.fill(45)
        elif name == 'LON':
            ret = np.empty(shp)
            ret.fill(-71)
        elif name == 'TEMPWAT_L0':
            ret = _getarr(280000, shp, 350000)
        elif name == 'CONDWAT_L0':
            ret = _getarr(100000, shp, 750000)
        elif name == 'PRESWAT_L0':
            ret = _getarr(3000, shp, 10000)
        elif name in self.value_classes: # Non-L0 parameters
            ret = self.value_classes[name][:]
        else:
            return np.zeros(shp)

        return ret
    
    def _setup_streams(self, exchange_pt1, exchange_pt2, available_fields_in=[], available_fields_out=[]):
        proc_def_id = self._create_proc_def()

        incoming_pdict_id = self._L0_pdict()
        outgoing_pdict_id = self._L1_pdict()
        
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('L0_stream_def', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('L1_stream_def', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)

        L0_data_product_id = self._data_product('L0_SBE37', incoming_stream_def_id, exchange_pt1)
        L1_data_product_id = self._data_product('L1_SBE37', outgoing_stream_def_id, exchange_pt2)

        self._data_process(proc_def_id, [L0_data_product_id], L1_data_product_id, outgoing_stream_def_id)
        
        stream_ids, _ = self.container.resource_registry.find_objects(L0_data_product_id, PRED.hasStream, None, True)
        stream_id_in = stream_ids[0]

        stream_ids, _ = self.container.resource_registry.find_objects(L1_data_product_id, PRED.hasStream, None, True)
        stream_id_out = stream_ids[0]
        
        stream_route_in = self.pubsub_management.read_stream_route(stream_id_in)
        stream_route_out = self.pubsub_management.read_stream_route(stream_id_out)

        return (stream_id_in,stream_id_out,stream_route_in,stream_route_out,incoming_stream_def_id,outgoing_stream_def_id)
    
    def _validate_transforms(self, rdt_in, rdt_out):
        #passthrus
        self.assertTrue(np.allclose(rdt_in['time'], rdt_out['time']))
        self.assertTrue(np.allclose(rdt_in['lat'], rdt_out['lat']))
        self.assertTrue(np.allclose(rdt_in['lon'], rdt_out['lon']))
        self.assertTrue(np.allclose(rdt_in['TEMPWAT_L0'], rdt_out['TEMPWAT_L0']))
        self.assertTrue(np.allclose(rdt_in['CONDWAT_L0'], rdt_out['CONDWAT_L0']))
        self.assertTrue(np.allclose(rdt_in['PRESWAT_L0'], rdt_out['PRESWAT_L0']))
        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        t1 = (rdt_out['TEMPWAT_L0'] / 10000) - 10
        self.assertTrue(np.allclose(rdt_out['TEMPWAT_L1'], t1))
        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        c1 = (rdt_out['CONDWAT_L0'] / 100000) - 0.5
        self.assertTrue(np.allclose(rdt_out['CONDWAT_L1'], c1))
        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        p1 = (rdt_out['PRESWAT_L0'] * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)
        self.assertTrue(np.allclose(rdt_out['PRESWAT_L1'], p1))
        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        ps = gsw.SP_from_C((rdt_out['CONDWAT_L1'] * 10.), rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1'])
        self.assertTrue(np.allclose(rdt_out['PRACSAL'], ps))
        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        abs_sal = gsw.SA_from_SP(rdt_out['PRACSAL'], rdt_out['PRESWAT_L1'], rdt_out['lon'], rdt_out['lat'])
        cons_temp = gsw.CT_from_t(abs_sal, rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1'])
        rho = gsw.rho(abs_sal, cons_temp, rdt_out['PRESWAT_L1'])
        self.assertTrue(np.allclose(rdt_out['DENSITY'], rho))
    
    def test_execute_transform(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)

        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        
        msg = rdt_in.to_granule()
        #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}})
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out))
        #need below to wrap result in a param val object
        rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule())
        for k,v in rdt_out.iteritems():
            self.assertEqual(len(v), dt)        
        
        self._validate_transforms(rdt_in, rdt_out)
        self.container.proc_manager.terminate_process(pid)
    
    def test_transform_prime_no_available_fields(self):
        available_fields_in = []
        available_fields_out = []
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            for k,v in rdt_out.iteritems():
                self.assertEquals(rdt_out[k], None)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))

        #self.container.proc_manager.terminate_process(pid)

    def test_transform_prime(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            self._validate_transforms(rdt_in, rdt_out)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli           = DataProductManagementServiceClient()
        self.rrclient           = ResourceRegistryServiceClient()
        self.damsclient         = DataAcquisitionManagementServiceClient()
        self.pubsubcli          = PubsubManagementServiceClient()
        self.ingestclient       = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc               = UserNotificationServiceClient()
        self.data_retriever     = DataRetrieverServiceClient()
        self.identcli           = IdentityManagementServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------




        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp')

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)


        #----------------------------------------------------------------------------------------
        # Create users then notifications to this data product for each user
        #----------------------------------------------------------------------------------------

        # user_1
        user_1 = UserInfo()
        user_1.name = 'user_1'
        user_1.contact.email = '*****@*****.**'

        # user_2
        user_2 = UserInfo()
        user_2.name = 'user_2'
        user_2.contact.email = '*****@*****.**'
        #user1 is a complete user
        self.subject = "/DC=org/DC=cilogon/C=US/O=ProtectNetwork/CN=Roger Unwin A254"
        actor_identity_obj = IonObject("ActorIdentity", {"name": self.subject})
        actor_id = self.identcli.create_actor_identity(actor_identity_obj)

        user_credentials_obj = IonObject("UserCredentials", {"name": self.subject})
        self.identcli.register_user_credentials(actor_id, user_credentials_obj)
        user_id_1 = self.identcli.create_user_info(actor_id, user_1)
        user_id_2, _ = self.rrclient.create(user_2)

        delivery_config1a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        delivery_config1b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        notification_request_1 = NotificationRequest(   name = "notification_1",
            origin=dp_id,
            origin_type="type_1",
            event_type=OT.ResourceLifecycleEvent,
            disabled_by_system = False,
            delivery_configurations=[delivery_config1a, delivery_config1b])

        delivery_config2a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        delivery_config2b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        notification_request_2 = NotificationRequest(   name = "notification_2",
            origin=dp_id,
            origin_type="type_2",
            disabled_by_system = False,
            event_type=OT.DetectionEvent,
            delivery_configurations=[delivery_config2a, delivery_config2b])

        notification_request_1_id = self.unsc.create_notification(notification=notification_request_1, user_id=user_id_1)
        notification_request_2_id = self.unsc.create_notification(notification=notification_request_2, user_id=user_id_2)
        self.unsc.delete_notification(notification_request_1_id)



        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        #validate that there is one active and one retired user notification for this data product
        self.assertEqual(1, len(extended_product.computed.active_user_subscriptions.value))
        self.assertEqual(1, len(extended_product.computed.past_user_subscriptions.value))

        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)


        def ion_object_encoder(obj):
            return obj.__dict__


        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)
        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0)
        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)

        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id)

        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)


        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)


    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)


        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT', category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))


    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)


        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)


        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60 :
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Beispiel #30
0
class TestTransformWorkerSubscriptions(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_two',
                                 description='input test stream two')
        self.input_dp_two_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )
        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id,
                                                    self.input_dp_two_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one',
            stream_ids=[self.stream_one_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one_two',
            stream_ids=[self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(
            self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(
            stream_id=self.stream_two_id, stream_route=stream_route_two)

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure'] = [4]
        rdt['salinity'] = [1]

        self.publisher_two.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_two_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )

        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id,
                                                    dp1_func_output_dp_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(
            dp1_func_output_dp_id in
            output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(
            self.input_dp_one_id in
            output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_one_id, self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    def create_data_process_definition(self):

        #two data processes using one transform and one DPD

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        return add_array_dpd_id

    def create_data_process_one(self, data_process_definition_id,
                                output_dataproduct):

        # Create the data process
        #data process 1 adds conductivity + pressure and puts the result in salinity
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[self.input_dp_one_id],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)
        self.dp_list.append(dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process_two(self, data_process_definition_id,
                                input_dataproduct, output_dataproduct):

        # Create the data process
        #data process 2 adds salinity + pressure and puts the result in conductivity
        argument_map = {'arr1': 'salinity', 'arr2': 'pressure'}
        output_param = 'conductivity'
        dp2_func_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[input_dataproduct],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp2_func_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp2_func_data_process_id)
        self.dp_list.append(dp2_func_data_process_id)

        return dp2_func_data_process_id

    def create_output_data_products(self):

        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_one_id = stream_ids[0]

        dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp2_func_output_dp_obj = IonObject(
            RT.DataProduct,
            name='data_process2_data_product',
            description='output of add array func')

        dp2_func_output_dp_id = self.dataproductclient.create_data_product(
            dp2_func_output_dp_obj, dp2_func_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp2_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_two_id = stream_ids[0]

        subscription_id = self.pubsub_client.create_subscription(
            'validator',
            data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id, dp2_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        #if data process already created, check origin
        if not 'data process assigned to transform worker' in data_process_event.description:
            self.assertIn(data_process_event.origin, self.dp_list)

    def validate_output_granule(self, msg, route, stream_id):
        self.assertTrue(
            stream_id in
            [self._output_stream_one_id, self._output_stream_two_id])

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  stream_id: %s', stream_id)

        if stream_id == self._output_stream_one_id:
            sal_val = rdt['salinity']
            log.debug('validate_output_granule  sal_val: %s', sal_val)
            np.testing.assert_array_equal(sal_val, np.array([3]))
            self.event1_verified.set()
        else:
            cond_val = rdt['conductivity']
            log.debug('validate_output_granule  cond_val: %s', cond_val)
            np.testing.assert_array_equal(cond_val, np.array([5]))
            self.event2_verified.set()

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)
Beispiel #31
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i + length)

        for field in rdt:
            if isinstance(
                    rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array(
                    [random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()
        self.i += length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name=''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext(
            'time',
            param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext(
            'conductivity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext(
            'pressure',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext(
            'temperature',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext(
            'density',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext(
            'salinity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(
                pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,
                            ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(
            parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description=
            'Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(
            dpd_obj)
        self.addCleanup(
            self.data_process_management.delete_data_process_definition,
            dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(
            parameter_dict_name='fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
        #        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(
            name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition,
                        input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s",
                  input_param_dict)
        log.debug("Got the stream def for parsed input: %s",
                  input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        input_dp_id = self.dataproduct_management.create_data_product(
            data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(
            data_product=L0_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        out_stream_ids, _ = self.resource_registry.find_objects(
            L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process(
            data_process_definition_id=dprocdef_id,
            in_data_product_ids=[input_dp_id],
            out_data_product_ids=[L0_stream_dp_id],
            configuration=None)

        self.addCleanup(self.data_process_management.delete_data_process,
                        dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process,
                        dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(
            input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()

        def subscriber(m, r, s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub',
                                         callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
                                                 stream_ids=[output_stream_id],
                                                 exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=input_stream_def_dict, length=5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s",
                  granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)

    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli           = DataProductManagementServiceClient()
        self.rrclient           = ResourceRegistryServiceClient()
        self.damsclient         = DataAcquisitionManagementServiceClient()
        self.pubsubcli          = PubsubManagementServiceClient()
        self.ingestclient       = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc               = UserNotificationServiceClient()
        self.data_retriever     = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom.dump(), 
            spatial_domain = sdom.dump())

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)


        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)


        def ion_object_encoder(obj):
            return obj.__dict__


        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)
        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0)
        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)

        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id)

        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)


    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))


    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
class BulkIngestBase(object):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub_management    = PubsubManagementServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.data_acquisition_management = DataAcquisitionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node)
        self.resource_registry       = self.container.resource_registry
        self.context_ids = self.build_param_contexts()
        self.setup_resources()

    def build_param_contexts(self):
        raise NotImplementedError('build_param_contexts must be implemented in child classes')

    def create_external_dataset(self):
        raise NotImplementedError('create_external_dataset must be implemented in child classes')

    def get_dvr_config(self):
        raise NotImplementedError('get_dvr_config must be implemented in child classes')

    def get_retrieve_client(self, dataset_id=''):
        raise NotImplementedError('get_retrieve_client must be implemented in child classes')

    def test_data_ingest(self):
        self.pdict_id = self.create_parameter_dict(self.name)
        self.stream_def_id = self.create_stream_def(self.name, self.pdict_id)
        self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id)
        self.dataset_id = self.get_dataset_id(self.data_product_id)
        self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id)
        self.external_dataset_id = self.create_external_dataset()
        self.data_producer_id = self.register_external_dataset(self.external_dataset_id)
        self.start_agent()

    def create_parameter_dict(self, name=''):
        return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time')

    def create_stream_def(self, name='', pdict_id=''):
        return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id)

    def create_data_product(self, name='', description='', stream_def_id=''):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(
            name=name,
            description=description,
            processing_level_code='Parsed_Canonical',
            temporal_domain=tdom,
            spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(data_product_id)
        return data_product_id

    def register_external_dataset(self, external_dataset_id=''):
        return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id)

    def get_dataset_id(self, data_product_id=''):
        dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True)
        return dataset_ids[0]

    def get_stream_id_and_route(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        #self.create_logger(self.name, stream_id)
        return stream_id, route

    def start_agent(self):
        agent_config = {
            'driver_config': self.get_dvr_config(),
            'stream_config': {},
            'agent': {'resource_id': self.external_dataset_id},
            'test_mode': True
        }

        _ia_pid = self.container.spawn_process(
            name=self.EDA_NAME,
            module=self.EDA_MOD,
            cls=self.EDA_CLS,
            config=agent_config)

        self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess())

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        self._ia_client.execute_agent(cmd)
        cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE)
        self._ia_client.execute_resource(command=cmd)

        self.start_listener(self.dataset_id)

    def stop_agent(self):
        cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE)
        self._ia_client.execute_resource(cmd)

        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        self._ia_client.execute_agent(cmd)

    def start_listener(self, dataset_id=''):
        dataset_modified = Event()
        #callback to use retrieve to get data from the coverage
        def cb(*args, **kwargs):
            self.get_retrieve_client(dataset_id=dataset_id)

        #callback to keep execution going once dataset has been fully ingested
        def cb2(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id)
        es.start()

        es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample')
        es2.start()

        self.addCleanup(es.stop)
        self.addCleanup(es2.stop)

        #let it go for up to 120 seconds, then stop the agent and reset it
        dataset_modified.wait(120)
        self.stop_agent()

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration)

        return pid
class TestOmsLaunch(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()


        # Use the network definition provided by RSN OMS directly.
        rsn_oms = CIOMSClientFactory.create_instance(DVR_CONFIG['oms_uri'])
        self._network_definition = RsnOmsUtil.build_network_definition(rsn_oms)
        # get serialized version for the configuration:
        self._network_definition_ser = NetworkUtil.serialize_network_definition(self._network_definition)
        if log.isEnabledFor(logging.DEBUG):
            log.debug("NetworkDefinition serialization:\n%s", self._network_definition_ser)



        self.platformModel_id = None

        self.all_platforms = {}
        self.agent_streamconfig_map = {}

        self._async_data_result = AsyncResult()
        self._data_subscribers = []
        self._samples_received = []
        self.addCleanup(self._stop_data_subscribers)

        self._async_event_result = AsyncResult()
        self._event_subscribers = []
        self._events_received = []
        self.addCleanup(self._stop_event_subscribers)
        self._start_event_subscriber()

        self._set_up_DataProduct_obj()
        self._set_up_PlatformModel_obj()

    def _set_up_DataProduct_obj(self):
        # Create data product object to be used for each of the platform log streams
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        self.pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True)
        self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition(
            name='platform_eng', parameter_dictionary_id=self.pdict_id)
        self.dp_obj = IonObject(RT.DataProduct,
            name='platform_eng data',
            description='platform_eng test',
            temporal_domain = tdom,
            spatial_domain = sdom)

    def _set_up_PlatformModel_obj(self):
        # Create PlatformModel
        platformModel_obj = IonObject(RT.PlatformModel,
            name='RSNPlatformModel',
            description="RSNPlatformModel")
        try:
            self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new PLatformModel: %s" %ex)
        log.debug( 'new PlatformModel id = %s', self.platformModel_id)

    def _traverse(self, pnode, platform_id, parent_platform_objs=None):
        """
        Recursive routine that repeatedly calls _prepare_platform to build
        the object dictionary for each platform.

        @param pnode PlatformNode
        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval the dict returned by _prepare_platform at this level.
        """

        log.info("Starting _traverse for %r", platform_id)

        plat_objs = self._prepare_platform(pnode, platform_id, parent_platform_objs)

        self.all_platforms[platform_id] = plat_objs

        # now, traverse the children:
        for sub_pnode in pnode.subplatforms.itervalues():
            subplatform_id = sub_pnode.platform_id
            self._traverse(sub_pnode, subplatform_id, plat_objs)

        return plat_objs

    def _prepare_platform(self, pnode, platform_id, parent_platform_objs):
        """
        This routine generalizes the manual construction originally done in
        test_oms_launch.py. It is called by the recursive _traverse method so
        all platforms starting from a given base platform are prepared.

        Note: For simplicity in this test, sites are organized in the same
        hierarchical way as the platforms themselves.

        @param pnode PlatformNode
        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval a dict of associated objects similar to those in
                test_oms_launch
        """

        site__obj = IonObject(RT.PlatformSite,
            name='%s_PlatformSite' % platform_id,
            description='%s_PlatformSite platform site' % platform_id)

        site_id = self.omsclient.create_platform_site(site__obj)

        if parent_platform_objs:
            # establish hasSite association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['site_id'],
                predicate=PRED.hasSite,
                object=site_id)

        # prepare platform attributes and ports:
        monitor_attribute_objs, monitor_attribute_dicts = self._prepare_platform_attributes(pnode, platform_id)

        port_objs, port_dicts = self._prepare_platform_ports(pnode, platform_id)

        device__obj = IonObject(RT.PlatformDevice,
            name='%s_PlatformDevice' % platform_id,
            description='%s_PlatformDevice platform device' % platform_id,
            #                        ports=port_objs,
            #                        platform_monitor_attributes = monitor_attribute_objs
        )

        device__dict = dict(ports=port_dicts,
            platform_monitor_attributes=monitor_attribute_dicts)

        self.device_id = self.imsclient.create_platform_device(device__obj)

        self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, self.device_id)
        self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=self.device_id)
        self.damsclient.register_instrument(instrument_id=self.device_id)


        if parent_platform_objs:
            # establish hasDevice association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['device_id'],
                predicate=PRED.hasDevice,
                object=self.device_id)

        agent__obj = IonObject(RT.PlatformAgent,
            name='%s_PlatformAgent' % platform_id,
            description='%s_PlatformAgent platform agent' % platform_id)

        agent_id = self.imsclient.create_platform_agent(agent__obj)

        if parent_platform_objs:
            # add this platform_id to parent's children:
            parent_platform_objs['children'].append(platform_id)


        self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id)

        #        agent_instance_obj = IonObject(RT.PlatformAgentInstance,
        #                                name='%s_PlatformAgentInstance' % platform_id,
        #                                description="%s_PlatformAgentInstance" % platform_id)
        #
        #        agent_instance_id = self.imsclient.create_platform_agent_instance(
        #                            agent_instance_obj, agent_id, device_id)

        plat_objs = {
            'platform_id':        platform_id,
            'site__obj':          site__obj,
            'site_id':            site_id,
            'device__obj':        device__obj,
            'device_id':          self.device_id,
            'agent__obj':         agent__obj,
            'agent_id':           agent_id,
            #            'agent_instance_obj': agent_instance_obj,
            #            'agent_instance_id':  agent_instance_id,
            'children':           []
        }

        log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs))

        stream_config = self._create_stream_config(plat_objs)
        self.agent_streamconfig_map[platform_id] = stream_config
        #        self.agent_streamconfig_map[platform_id] = None
        #        self._start_data_subscriber(agent_instance_id, stream_config)

        return plat_objs

    def _prepare_platform_attributes(self, pnode, platform_id):
        """
        Returns the list of PlatformMonitorAttributes objects corresponding to
        the attributes associated to the given platform.
        """
        # TODO complete the clean-up of this method
        ret_infos = dict((n, a.defn) for (n, a) in pnode.attrs.iteritems())

        monitor_attribute_objs = []
        monitor_attribute_dicts = []
        for attrName, attrDfn in ret_infos.iteritems():
            log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName)

            monitor_rate = attrDfn['monitorCycleSeconds']
            units =        attrDfn['units']

            plat_attr_obj = IonObject(OT.PlatformMonitorAttributes,
                id=attrName,
                monitor_rate=monitor_rate,
                units=units)

            plat_attr_dict = dict(id=attrName,
                monitor_rate=monitor_rate,
                units=units)

            monitor_attribute_objs.append(plat_attr_obj)
            monitor_attribute_dicts.append(plat_attr_dict)

        return monitor_attribute_objs, monitor_attribute_dicts

    def _prepare_platform_ports(self, pnode, platform_id):
        """
        Returns the list of PlatformPort objects corresponding to the ports
        associated to the given platform.
        """
        # TODO complete the clean-up of this method

        port_objs = []
        port_dicts = []
        for port_id, network in pnode.ports.iteritems():
            log.debug("platform_id=%r: preparing port=%r network=%s",
                      platform_id, port_id, network)

            #
            # Note: the name "IP" address has been changed to "network" address
            # in the CI-OMS interface spec.
            #
            plat_port_obj = IonObject(OT.PlatformPort,
                                      port_id=port_id,
                                      ip_address=network)

            plat_port_dict = dict(port_id=port_id,
                                  network=network)

            port_objs.append(plat_port_obj)

            port_dicts.append(plat_port_dict)

        return port_objs, port_dicts

    def _create_stream_config(self, plat_objs):

        platform_id = plat_objs['platform_id']
        device_id =   plat_objs['device_id']


        #create the log data product
        self.dp_obj.name = '%s platform_eng data' % platform_id
        self.data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id)
        self.damsclient.assign_data_product(input_resource_id=self.device_id, data_product_id=self.data_product_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(self.data_product_id, PRED.hasStream, None, True)

        stream_config = self._build_stream_config(stream_ids[0])
        return stream_config

    def _build_stream_config(self, stream_id=''):

        platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed')

        #get the streamroute object from pubsub by passing the stream_id
        stream_def_ids, _ = self.rrclient.find_objects(stream_id,
            PRED.hasStreamDefinition,
            RT.StreamDefinition,
            True)


        stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id)
        stream_config = {'routing_key' : stream_route.routing_key,
                         'stream_id' : stream_id,
                         'stream_definition_ref' : stream_def_ids[0],
                         'exchange_point' : stream_route.exchange_point,
                         'parameter_dictionary':platform_eng_dictionary.dump()}

        return stream_config

    def _set_platform_agent_instances(self):
        """
        Once most of the objs/defs associated with all platforms are in
        place, this method creates and associates the PlatformAgentInstance
        elements.
        """

        self.platform_configs = {}
        for platform_id, plat_objs in self.all_platforms.iteritems():

            PLATFORM_CONFIG  = {
                'platform_id':             platform_id,

                'agent_streamconfig_map':  None, #self.agent_streamconfig_map,

                'driver_config':           DVR_CONFIG,

                'network_definition' :     self._network_definition_ser
                }

            self.platform_configs[platform_id] = {
                'platform_id':             platform_id,

                'agent_streamconfig_map':  self.agent_streamconfig_map,

                'driver_config':           DVR_CONFIG,

                'network_definition' :     self._network_definition_ser
                }

            agent_config = {
                'platform_config': PLATFORM_CONFIG,
                }

            self.stream_id = self.agent_streamconfig_map[platform_id]['stream_id']

            #            import pprint
            #            print '============== platform id within unit test: %s ===========' % platform_id
            #            pprint.pprint(agent_config)
            #agent_config['platform_config']['agent_streamconfig_map'] = None

            agent_instance_obj = IonObject(RT.PlatformAgentInstance,
                name='%s_PlatformAgentInstance' % platform_id,
                description="%s_PlatformAgentInstance" % platform_id,
                agent_config=agent_config)

            agent_id = plat_objs['agent_id']
            device_id = plat_objs['device_id']
            agent_instance_id = self.imsclient.create_platform_agent_instance(
                agent_instance_obj, agent_id, self.device_id)

            plat_objs['agent_instance_obj'] = agent_instance_obj
            plat_objs['agent_instance_id']  = agent_instance_id


            stream_config = self.agent_streamconfig_map[platform_id]
            self._start_data_subscriber(agent_instance_id, stream_config)


    def _start_data_subscriber(self, stream_name, stream_config):
        """
        Starts data subscriber for the given stream_name and stream_config
        """

        def consume_data(message, stream_route, stream_id):
            # A callback for processing subscribed-to data.
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            self._async_data_result.set()

        log.info('_start_data_subscriber stream_name=%r', stream_name)

        stream_id = self.stream_id #stream_config['stream_id']

        # Create subscription for the stream
        exchange_name = '%s_queue' % stream_name
        self.container.ex_manager.create_xn_queue(exchange_name).purge()
        sub = StandaloneStreamSubscriber(exchange_name, consume_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id])
        self.pubsubcli.activate_subscription(sub_id)
        sub.subscription_id = sub_id

    def _stop_data_subscribers(self):
        """
        Stop the data subscribers on cleanup.
        """
        try:
            for sub in self._data_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._data_subscribers = []

    def _start_event_subscriber(self, event_type="DeviceEvent", sub_type="platform_event"):
        """
        Starts event subscriber for events of given event_type ("DeviceEvent"
        by default) and given sub_type ("platform_event" by default).
        """

        def consume_event(evt, *args, **kwargs):
            # A callback for consuming events.
            log.info('Event subscriber received evt: %s.', str(evt))
            self._events_received.append(evt)
            self._async_event_result.set(evt)

        sub = EventSubscriber(event_type=event_type,
            sub_type=sub_type,
            callback=consume_event)

        sub.start()
        log.info("registered event subscriber for event_type=%r, sub_type=%r",
            event_type, sub_type)

        self._event_subscribers.append(sub)
        sub._ready_event.wait(timeout=EVENT_TIMEOUT)

    def _stop_event_subscribers(self):
        """
        Stops the event subscribers on cleanup.
        """
        try:
            for sub in self._event_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._event_subscribers = []

    @skip("IMS does't net implement topology")
    def test_hierarchy(self):
        self._create_launch_verify(BASE_PLATFORM_ID)

    @skip("Needs alignment with recent IMS changes")
    def test_single_platform(self):
        self._create_launch_verify('LJ01D')

    def _create_launch_verify(self, base_platform_id):
        # and trigger the traversal of the branch rooted at that base platform
        # to create corresponding ION objects and configuration dictionaries:

        pnode = self._network_definition.pnodes[base_platform_id]
        base_platform_objs = self._traverse(pnode, base_platform_id)

        # now that most of the topology information is there, add the
        # PlatformAgentInstance elements
        self._set_platform_agent_instances()

        base_platform_config = self.platform_configs[base_platform_id]

        log.info("base_platform_id = %r", base_platform_id)


        #-------------------------------------------------------------------------------------
        # Create Data Process Definition and Data Process for the eng stream monitor process
        #-------------------------------------------------------------------------------------
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='DemoStreamAlertTransform',
            description='For testing EventTriggeredTransform_B',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='DemoStreamAlertTransform')
        self.platform_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)

        #THERE SHOULD BE NO STREAMDEF REQUIRED HERE.
        platform_streamdef_id = self.pubsubcli.create_stream_definition(name='platform_eng_parsed', parameter_dictionary_id=self.pdict_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(platform_streamdef_id, self.platform_dprocdef_id, binding='output' )

        config = {
            'process':{
                'timer_interval': 5,
                'queue_name': 'a_queue',
                'variable_name': 'input_voltage',
                'time_field_name': 'preferred_timestamp',
                'valid_values': [-100, 100],
                'timer_origin': 'Interval Timer'
            }
        }


        platform_data_process_id = self.dataprocessclient.create_data_process(self.platform_dprocdef_id, [self.data_product_id], {}, config)
        self.dataprocessclient.activate_data_process(platform_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process, platform_data_process_id)

        #-------------------------------
        # Launch Base Platform AgentInstance, connect to the resource agent client
        #-------------------------------

        agent_instance_id = base_platform_objs['agent_instance_id']
        log.debug("about to call imsclient.start_platform_agent_instance with id=%s", agent_instance_id)
        pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
        log.debug("start_platform_agent_instance returned pid=%s", pid)

        #wait for start
        instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
            instance_obj.agent_process_id,
            ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds")

        agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id)
        log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id,  process=FakeProcess())
        log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client))

        log.debug("base_platform_config =\n%s", base_platform_config)

        # ping_agent can be issued before INITIALIZE
        retval = self._pa_client.ping_agent(timeout=TIMEOUT)
        log.debug( 'Base Platform ping_agent = %s', str(retval) )

        # issue INITIALIZE command to the base platform, which will launch the
        # creation of the whole platform hierarchy rooted at base_platform_config['platform_id']
        #        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config))
        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform INITIALIZE = %s', str(retval) )


        # GO_ACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) )

        # RUN:
        cmd = AgentCommand(command=PlatformAgentEvent.RUN)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RUN = %s', str(retval) )

        # START_MONITORING:
        cmd = AgentCommand(command=PlatformAgentEvent.START_MONITORING)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform START_MONITORING = %s', str(retval) )

        # wait for data sample
        # just wait for at least one -- see consume_data above
        log.info("waiting for reception of a data sample...")
        self._async_data_result.get(timeout=DATA_TIMEOUT)
        self.assertTrue(len(self._samples_received) >= 1)

        log.info("waiting a bit more for reception of more data samples...")
        sleep(15)
        log.info("Got data samples: %d", len(self._samples_received))


        # wait for event
        # just wait for at least one event -- see consume_event above
        log.info("waiting for reception of an event...")
        self._async_event_result.get(timeout=EVENT_TIMEOUT)
        log.info("Received events: %s", len(self._events_received))

        #get the extended platfrom which wil include platform aggreate status fields
        extended_platform = self.imsclient.get_platform_device_extension(self.device_id)
#        log.debug( 'test_single_platform   extended_platform: %s', str(extended_platform) )
#        log.debug( 'test_single_platform   power_status_roll_up: %s', str(extended_platform.computed.power_status_roll_up.value) )
#        log.debug( 'test_single_platform   comms_status_roll_up: %s', str(extended_platform.computed.communications_status_roll_up.value) )

        # STOP_MONITORING:
        cmd = AgentCommand(command=PlatformAgentEvent.STOP_MONITORING)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform STOP_MONITORING = %s', str(retval) )

        # GO_INACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) )

        # RESET: Resets the base platform agent, which includes termination of
        # its sub-platforms processes:
        cmd = AgentCommand(command=PlatformAgentEvent.RESET)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RESET = %s', str(retval) )



        #-------------------------------
        # Stop Base Platform AgentInstance
        #-------------------------------
        self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)