Ejemplo n.º 1
0
    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))
Ejemplo n.º 2
0
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []


        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id)


        tdom, sdom = time_series_domain()

        dp_obj = IonObject(RT.DataProduct,
            name=str(uuid.uuid4()),
            description='ctd stream test',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id)


        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]}

        for value in tomato['values']:
            log.debug("test_granule_publish: Looping tomato values  key: %s    val: %s ", str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array( [ value['value'] ] )
                log.debug("test_granule_publish: Added data item  %s  val: %s ", str(value['value']), str(value['value_id']) )

        g = rdt.to_granule()

        publisher.publish(g)

        gevent.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
  
        #--------------------------------------------------------------------------------
        # Cleanup data products
        #--------------------------------------------------------------------------------
        dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True)

        for dp_id in dp_ids:
            self.dataproductclient.delete_data_product(dp_id)
Ejemplo n.º 3
0
    def test_data_product_subscription(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()
        dp = DataProduct(name='ctd parsed')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription_id)

        validated = Event()
        def validation(msg, route, stream_id):
            validated.set()

        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        dp_stream_id = stream_ids.pop()

        validator = StandaloneStreamSubscriber('validator', callback=validation)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_management.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id)

        route = self.pubsub_management.read_stream_route(dp_stream_id)

        publisher = StandaloneStreamPublisher(dp_stream_id, route)
        publisher.publish('hi')
        self.assertTrue(validated.wait(10))
    def publish_to_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        granule = rdt.to_granule()
        publisher.publish(granule)
Ejemplo n.º 5
0
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Ejemplo n.º 6
0
    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))
Ejemplo n.º 7
0
    def _publish_to_transform(self, stream_id = '', stream_route = None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5)
        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)
Ejemplo n.º 8
0
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))
Ejemplo n.º 10
0
 def publish_rdt_to_data_product(cls,data_product_id, rdt, connection_id='', connection_index=''):
     resource_registry       = Container.instance.resource_registry
     pubsub_management       = PubsubManagementServiceClient()
     stream_ids, _ = resource_registry.find_objects(data_product_id,'hasStream',id_only=True)
     stream_id = stream_ids[0]
     route = pubsub_management.read_stream_route(stream_id)
     publisher = StandaloneStreamPublisher(stream_id,route)
     publisher.publish(rdt.to_granule(connection_id=connection_id, connection_index=connection_index))
Ejemplo n.º 11
0
 def publish_and_wait(self, dataset_id, granule):
     stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
     stream_id=stream_ids[0]
     route = self.pubsub_management.read_stream_route(stream_id)
     publisher = StandaloneStreamPublisher(stream_id,route)
     dataset_monitor = DatasetMonitor(dataset_id)
     publisher.publish(granule)
     self.assertTrue(dataset_monitor.event.wait(10))
Ejemplo n.º 12
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))
Ejemplo n.º 13
0
    def publish_hifi(self,stream_id,stream_route,offset=0):
        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []


        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id)



        dp_obj = IonObject(RT.DataProduct,
            name=str(uuid.uuid4()),
            description='ctd stream test')

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id)


        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]}

        for value in tomato['values']:
            log.debug("test_granule_publish: Looping tomato values  key: %s    val: %s ", str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array( [ value['value'] ] )
                log.debug("test_granule_publish: Added data item  %s  val: %s ", str(value['value']), str(value['value_id']) )

        g = rdt.to_granule()

        publisher.publish(g)

        gevent.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
  
        #--------------------------------------------------------------------------------
        # Cleanup data products
        #--------------------------------------------------------------------------------
        dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True)

        for dp_id in dp_ids:
            self.dataproductclient.delete_data_product(dp_id)
Ejemplo n.º 15
0
 def publish_and_wait(self, dataset_id, granule):
     stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
     stream_id=stream_ids[0]
     route = self.pubsub_management.read_stream_route(stream_id)
     publisher = StandaloneStreamPublisher(stream_id,route)
     dataset_monitor = DatasetMonitor(dataset_id)
     self.addCleanup(dataset_monitor.stop)
     publisher.publish(granule)
     self.assertTrue(dataset_monitor.wait())
Ejemplo n.º 16
0
    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingestion_config_id,
            dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream,
                        stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'],
                                             np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'],
                                             np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'],
                                             np.array([3.068]))
        np.testing.assert_array_almost_equal(
            rdt_out['density'], np.array([1021.7144739593881],
                                         dtype='float32'))
        np.testing.assert_array_almost_equal(
            rdt_out['salinity'], np.array([30.935132729668283],
                                          dtype='float32'))
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000)  for l in xrange(length)])
            g = rdt.to_granule()
            pub.publish(g)
Ejemplo n.º 18
0
    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
Ejemplo n.º 19
0
    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription(
            'first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m, r, s):
            self.assertEquals(m, 'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id,
                                                 exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)
Ejemplo n.º 20
0
    def write_to_data_product(self, data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                             'hasDataset',
                                                             id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                            'hasStream',
                                                            id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(
            stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(
                data_product_id).name
            return
        rdt[time_param] = np.arange(40)

        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(
                rdt._pdict.get_context(field).param_type, 40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
Ejemplo n.º 21
0
    def test_demux(self):
        self.stream0, self.route0 = self.pubsub_client.create_stream(
            'stream0', exchange_point='test')
        self.stream1, self.route1 = self.pubsub_client.create_stream(
            'stream1', exchange_point='main_data')
        self.stream2, self.route2 = self.pubsub_client.create_stream(
            'stream2', exchange_point='alt_data')

        self.r_stream1 = gevent.event.Event()
        self.r_stream2 = gevent.event.Event()

        def process(msg, stream_route, stream_id):
            if stream_id == self.stream1:
                self.r_stream1.set()
            elif stream_id == self.stream2:
                self.r_stream2.set()

        self.container.spawn_process(
            'demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform',
            {'process': {
                'out_streams': [self.stream1, self.stream2]
            }}, 'demuxer_pid')
        self.queue_cleanup.append('demuxer_pid')

        sub1 = StandaloneStreamSubscriber('sub1', process)
        sub2 = StandaloneStreamSubscriber('sub2', process)
        sub1.xn.bind(self.route1.routing_key,
                     self.container.ex_manager.create_xp('main_data'))
        sub2.xn.bind(self.route2.routing_key,
                     self.container.ex_manager.create_xp('alt_data'))
        sub1.start()
        sub2.start()

        self.queue_cleanup.append(sub1.xn)
        self.queue_cleanup.append(sub2.xn)
        xn = self.container.ex_manager.create_xn_queue('demuxer_pid')
        xn.bind(
            self.route0.routing_key,
            self.container.ex_manager.create_xp(self.route0.exchange_point))
        domino = StandaloneStreamPublisher(self.stream0, self.route0)
        domino.publish('test')

        self.assertTrue(self.r_stream1.wait(2))
        self.assertTrue(self.r_stream2.wait(2))

        self.container.proc_manager.terminate_process('demuxer_pid')

        sub1.stop()
        sub2.stop()
Ejemplo n.º 22
0
    def set_configuration(self, config):
        log.warn("DRIVER: set_configuration")
        """
        expect configuration to have:
        - parser module/class
        - directory, wildcard to find data files
        - optional timestamp of last granule
        - optional poll rate
        - publish info
        """
        log.error("Log level: %s", log.getEffectiveLevel())
        log.debug('using configuration: %s', config)
        self.config = config
        self.max_records = get_safe(config, 'max_records', 100)
        self.stream_config = self.CFG.get('stream_config', {})
        if len(self.stream_config) == 1:
            stream_cfg = self.stream_config.values()[0]
        elif len(self.stream_config) > 1:
            stream_cfg = self.stream_config.values()[0]

        stream_id = stream_cfg['stream_id']
        stream_route = IonObject(OT.StreamRoute,
                                 routing_key=stream_cfg['routing_key'],
                                 exchange_point=stream_cfg['exchange_point'])
        param_dict = stream_cfg['stream_def_dict']['parameter_dictionary']
        self.publisher = StandaloneStreamPublisher(stream_id=stream_id,
                                                   stream_route=stream_route)
        self.parameter_dictionary = ParameterDictionary.load(param_dict)
        self.time_field = self.parameter_dictionary.get_temporal_context()
        self.latest_granule_time = get_safe(config, 'last_time', 0)
    def test_transform_prime_no_available_fields(self):
        available_fields_in = []
        available_fields_out = []
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            for k,v in rdt_out.iteritems():
                self.assertEquals(rdt_out[k], None)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))
Ejemplo n.º 24
0
    def test_retrieve_and_transform(self):

        # Stream definition for the CTD data
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id)


        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        #--------------------------------------------------------------------------------
        # Again with this ridiculous problem
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)
        self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 2)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
Ejemplo n.º 25
0
    def write_to_data_product(self,data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name 
            return
        rdt[time_param] = np.arange(40)


        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id,40)


        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad
Ejemplo n.º 26
0
    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp")
        # --------------------------------------------------------------------------------
        # Test moving after activate
        # --------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m, r, s):
            self.assertEquals(m, "verified")
            self.verified.set()

        subscriber = StandaloneStreamSubscriber("second_queue", verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue")

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish("verified")

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()


        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated


        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))
Ejemplo n.º 28
0
    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream(
            'stream1', 'xp1')
        subscription_id = self.pubsub_management.create_subscription(
            'sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m, r, s):
            self.check1.set()

        subscriber = StandaloneStreamSubscriber('sub1', verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)
Ejemplo n.º 29
0
    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            "ctd_parsed_param_dict", id_only=True
        )
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            "sal data", parameter_dictionary_id=salinity_pdict_id
        )

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10)
        rdt["temp"] = np.random.randn(10) * 10 + 30
        rdt["conductivity"] = np.random.randn(10) * 2 + 10
        rdt["pressure"] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt["time"] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            "ion.processes.data.transforms.ctd.ctd_L2_salinity",
            "CTDL2SalinityTransformAlgorithm",
            kwargs=dict(params=sal_stream_def_id),
        )
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt["salinity"]:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        times = numpy.array([number  for l in xrange(self.length)])

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))]
            rdt['time'] = times

            g = rdt.to_granule()
            g.data_producer_id = 'instrument_1'

            log.debug("granule #%s published by instrument:: %s" % ( number,g))

            pub.publish(g)
Ejemplo n.º 31
0
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        times = numpy.array([number  for l in xrange(self.length)])

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))]
            rdt['time'] = times

            g = rdt.to_granule()
            g.data_producer_id = 'instrument_1'

            log.debug("granule #%s published by instrument:: %s" % ( number,g))

            pub.publish(g)
Ejemplo n.º 32
0
    def test_transform_worker(self):
        self.loggerpids = []
        self.data_process_objs = []
        self._output_stream_ids = []

        self.start_transform_worker()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        #create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_listener()

        self.dp_list = self.create_data_processes()

        self.data_modified = Event()
        self.data_modified.wait(5)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())


        self.data_modified.wait(5)

        # Cleanup processes
        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
Ejemplo n.º 33
0
    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
Ejemplo n.º 34
0
    def test_demux(self):
        self.stream0, self.route0 = self.pubsub_client.create_stream('stream0', exchange_point='test')
        self.stream1, self.route1 = self.pubsub_client.create_stream('stream1', exchange_point='main_data')
        self.stream2, self.route2 = self.pubsub_client.create_stream('stream2', exchange_point='alt_data')
        
        self.r_stream1 = gevent.event.Event()
        self.r_stream2 = gevent.event.Event()

        def process(msg, stream_route, stream_id):
            if stream_id == self.stream1:
                self.r_stream1.set()
            elif stream_id == self.stream2:
                self.r_stream2.set()
        
        self.container.spawn_process('demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process':{'out_streams':[self.stream1, self.stream2]}}, 'demuxer_pid')
        self.queue_cleanup.append('demuxer_pid') 
        
        sub1 = StandaloneStreamSubscriber('sub1', process)
        sub2 = StandaloneStreamSubscriber('sub2', process)
        sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data'))
        sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data'))
        sub1.start()
        sub2.start()

        self.queue_cleanup.append(sub1.xn)
        self.queue_cleanup.append(sub2.xn)
        xn = self.container.ex_manager.create_xn_queue('demuxer_pid')
        xn.bind(self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point))
        domino = StandaloneStreamPublisher(self.stream0, self.route0)
        domino.publish('test')

        self.assertTrue(self.r_stream1.wait(2))
        self.assertTrue(self.r_stream2.wait(2))

        self.container.proc_manager.terminate_process('demuxer_pid')
        
        sub1.stop()
        sub2.stop()
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        self.stream_id, self.route_id = self.pubsub_client.create_stream(
            name='parsed_stream',
            stream_definition_id=self.stream_def_id,
            exchange_point='science_data')
        self.addCleanup(self.pubsub_client.delete_stream, self.stream_id)

        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        self.publisher = StandaloneStreamPublisher(self.stream_id,
                                                   self.route_id)
Ejemplo n.º 36
0
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Ejemplo n.º 37
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
Ejemplo n.º 38
0
    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream('stream1','xp1')
        subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m,r,s):
            self.check1.set()


        subscriber = StandaloneStreamSubscriber('sub1',verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)
Ejemplo n.º 39
0
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]


        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Ejemplo n.º 40
0
    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
Ejemplo n.º 41
0
 def set_configuration(self, config):
     """
     expect configuration to have:
     - parser module/class
     - directory, wildcard to find data files
     - optional timestamp of last granule
     - optional poll rate
     - publish info
     """
     log.debug('using configuration: %s', config)
     self.config = config
     self.max_records = get_safe(config, 'max_records', 100)
     stream_id = config['stream_id']
     stream_route_param = config['stream_route']
     stream_route = IonObject(OT.StreamRoute, stream_route_param)
     self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
     self.parameter_dictionary = ParameterDictionary.load(config['parameter_dict'])
     self.time_field = self.parameter_dictionary.get_temporal_context()
     self.latest_granule_time = get_safe(config, 'last_time', 0)
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        self.stream_id, self.route_id = self.pubsub_client.create_stream(name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data')
        self.addCleanup(self.pubsub_client.delete_stream, self.stream_id)

        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id)
Ejemplo n.º 43
0
    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            'sal data', parameter_dictionary_id=salinity_pdict_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'CTDL2SalinityTransformAlgorithm',
            kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)
Ejemplo n.º 44
0
    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
Ejemplo n.º 45
0
 def publish_on_stream(self, stream_id, msg):
     stream = self.pubsub_management.read_stream(stream_id)
     stream_route = stream.stream_route
     publisher = StandaloneStreamPublisher(stream_id=stream_id,
                                           stream_route=stream_route)
     publisher.publish(msg)
Ejemplo n.º 46
0
class TestTransformWorkerSubscriptions(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_two',
                                 description='input test stream two')
        self.input_dp_two_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )
        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id,
                                                    self.input_dp_two_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one',
            stream_ids=[self.stream_one_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one_two',
            stream_ids=[self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(
            self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(
            stream_id=self.stream_two_id, stream_route=stream_route_two)

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure'] = [4]
        rdt['salinity'] = [1]

        self.publisher_two.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_two_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )

        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id,
                                                    dp1_func_output_dp_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(
            dp1_func_output_dp_id in
            output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(
            self.input_dp_one_id in
            output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_one_id, self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    def create_data_process_definition(self):

        #two data processes using one transform and one DPD

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        return add_array_dpd_id

    def create_data_process_one(self, data_process_definition_id,
                                output_dataproduct):

        # Create the data process
        #data process 1 adds conductivity + pressure and puts the result in salinity
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[self.input_dp_one_id],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)
        self.dp_list.append(dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process_two(self, data_process_definition_id,
                                input_dataproduct, output_dataproduct):

        # Create the data process
        #data process 2 adds salinity + pressure and puts the result in conductivity
        argument_map = {'arr1': 'salinity', 'arr2': 'pressure'}
        output_param = 'conductivity'
        dp2_func_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[input_dataproduct],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp2_func_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp2_func_data_process_id)
        self.dp_list.append(dp2_func_data_process_id)

        return dp2_func_data_process_id

    def create_output_data_products(self):

        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_one_id = stream_ids[0]

        dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp2_func_output_dp_obj = IonObject(
            RT.DataProduct,
            name='data_process2_data_product',
            description='output of add array func')

        dp2_func_output_dp_id = self.dataproductclient.create_data_product(
            dp2_func_output_dp_obj, dp2_func_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp2_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_two_id = stream_ids[0]

        subscription_id = self.pubsub_client.create_subscription(
            'validator',
            data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id, dp2_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        #if data process already created, check origin
        if not 'data process assigned to transform worker' in data_process_event.description:
            self.assertIn(data_process_event.origin, self.dp_list)

    def validate_output_granule(self, msg, route, stream_id):
        self.assertTrue(
            stream_id in
            [self._output_stream_one_id, self._output_stream_two_id])

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  stream_id: %s', stream_id)

        if stream_id == self._output_stream_one_id:
            sal_val = rdt['salinity']
            log.debug('validate_output_granule  sal_val: %s', sal_val)
            np.testing.assert_array_equal(sal_val, np.array([3]))
            self.event1_verified.set()
        else:
            cond_val = rdt['conductivity']
            log.debug('validate_output_granule  cond_val: %s', cond_val)
            np.testing.assert_array_equal(cond_val, np.array([5]))
            self.event2_verified.set()

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)
Ejemplo n.º 47
0
    def test_stream_processing(self):
        #--------------------------------------------------------------------------------
        #Test that streams are processed by the transforms according to a provided algorithm
        #--------------------------------------------------------------------------------

        #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH,
        #todo(contd) and if the word VALUE=<number> exists and that number is less than something

        #todo later on we are going to use complex algorithms to make this prototype powerful

        #-------------------------------------------------------------------------------------
        # Start a subscriber to listen for an alert event from the Stream Alert Transform
        #-------------------------------------------------------------------------------------

        queue = gevent.queue.Queue()

        def event_received(message, headers):
            queue.put(message)

        event_subscriber = EventSubscriber( origin="StreamAlertTransform",
            event_type="DeviceEvent",
            callback=event_received)

        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        #-------------------------------------------------------------------------------------
        # The configuration for the Stream Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        config = {
            'process':{
                'queue_name': 'a_queue',
                'value': 10,
                'event_type':'DeviceEvent'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='StreamAlertTransform',
            configuration= config)
        self.addCleanup(self.process_dispatcher.cancel_process, pid)
        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish streams and make assertions about alerts
        #-------------------------------------------------------------------------------------
        exchange_name = 'a_queue'
        exchange_point = 'test_exchange'
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(exchange_name)
        xp = self.container.ex_manager.create_xp(exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\
                  " will trigger an alert event from the StreamAlertTransform because the value provided is "\
                  "less than 10 that was passed in through the config."

        pub.publish(message)

        event = queue.get(timeout=10)
        self.assertEquals(event.type_, "DeviceEvent")
        self.assertEquals(event.origin, "StreamAlertTransform")
Ejemplo n.º 48
0
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_two',
                                 description='input test stream two')
        self.input_dp_two_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )
        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id,
                                                    self.input_dp_two_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one',
            stream_ids=[self.stream_one_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one_two',
            stream_ids=[self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(
            self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(
            stream_id=self.stream_two_id, stream_route=stream_route_two)

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure'] = [4]
        rdt['salinity'] = [1]

        self.publisher_two.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_two_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))
Ejemplo n.º 49
0
    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description=
            'Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(
            dpd_obj)
        self.addCleanup(
            self.data_process_management.delete_data_process_definition,
            dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(
            parameter_dict_name='fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
        #        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(
            name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition,
                        input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s",
                  input_param_dict)
        log.debug("Got the stream def for parsed input: %s",
                  input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        input_dp_id = self.dataproduct_management.create_data_product(
            data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(
            data_product=L0_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        out_stream_ids, _ = self.resource_registry.find_objects(
            L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process(
            data_process_definition_id=dprocdef_id,
            in_data_product_ids=[input_dp_id],
            out_data_product_ids=[L0_stream_dp_id],
            configuration=None)

        self.addCleanup(self.data_process_management.delete_data_process,
                        dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process,
                        dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(
            input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()

        def subscriber(m, r, s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub',
                                         callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
                                                 stream_ids=[output_stream_id],
                                                 exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=input_stream_def_dict, length=5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s",
                  granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)
Ejemplo n.º 50
0
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )

        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id,
                                                    dp1_func_output_dp_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(
            dp1_func_output_dp_id in
            output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(
            self.input_dp_one_id in
            output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_one_id, self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))
Ejemplo n.º 53
0
    def execute_acquire_sample(self, *args):
        """
        Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle
        If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config
        Greenlet binds to BaseDataHandler._acquire_sample and passes the publisher and config
        Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock
        Called from:
                      InstrumentAgent._handler_observatory_execute_resource
                       |-->  ExternalDataAgent._handler_streaming_execute_resource

        @parameter args First argument can be a config dictionary
        @throws IndexError if first argument is not a dictionary
        @throws ConfigurationError if required members aren't present
        @retval New ResourceAgentState (COMMAND)
        """
        log.debug('Executing acquire_sample: args = {0}'.format(args))

        # Make a copy of the config to ensure no cross-pollution
        config = self._dh_config.copy()

        # Patch the config if mods are passed in
        try:
            config_mods = args[0]
            if not isinstance(config_mods, dict):
                raise IndexError()

            log.debug('Configuration modifications provided: {0}'.format(config_mods))
            for k in self._params['PATCHABLE_CONFIG_KEYS']:
                p = get_safe(config_mods, k)
                if not p is None:
                    config[k] = p

        except IndexError:
            log.info('No configuration modifications were provided')

        # Verify that there is a stream_id member in the config
        stream_id = get_safe(config, 'stream_id', None)
        if not stream_id:
            raise ConfigurationError('Configuration does not contain required \'stream_id\' member')
        stream_route = get_safe(config, 'stream_route', None)

        if not stream_route:
            raise ConfigurationError('Configuration does not contain required \'stream_route\' member')

        isNew = get_safe(config, 'constraints') is None

        if isNew and not self._semaphore.acquire(blocking=False):
            log.warn('Already acquiring new data - action not duplicated')
            return

        ndc = None
        if isNew:
            # Get the NewDataCheck attachment and add it's content to the config
            ext_ds_id = get_safe(config, 'external_dataset_res_id')
            if ext_ds_id:
                ndc = self._find_new_data_check_attachment(ext_ds_id)

        config['new_data_check'] = ndc

        # Create a publisher to pass into the greenlet
        publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)

        # Spawn a greenlet to do the data acquisition and publishing
        g = spawn(self._acquire_sample, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment)
        log.debug('** Spawned {0}'.format(g))
        self._glet_queue.append(g)
        return ResourceAgentState.COMMAND, None
class TestTransformWorker(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process = TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client,
                                  self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id,
                                                 PRED.hasDataset,
                                                 id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj, _ = self.rrclient.find_resources(
            restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name='test_ctd_device',
                                       description="test_ctd_device",
                                       serial_number="12345")
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )

        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type']
                        == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj, _ = self.rrclient.find_resources(
            restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice,
                                            name='TestPlatform',
                                            description="TestPlatform",
                                            serial_number="12345")
            platform_device_id = self.imsclient.create_platform_device(
                platform_device=platform_device_obj)

        self.damsclient.assign_data_product(
            input_resource_id=platform_device_id,
            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]
                        ['type'] == 'PlatformDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS)
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Input data product does not contain the parameters defined in argument map"
        )

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Output data product does not contain the output parameter name provided"
        )

    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map = {"a": "salinity"}

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM)

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id,
            add_array_dpd_id,
            binding='validate_salinity_array')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=None,
            argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a')
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=[dp1_func_output_dp_id],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject(RT.Attachment,
                                   name='test_attachment',
                                   attachment_type=AttachmentType.ASCII,
                                   content_type='text/plain',
                                   content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id,
                                                 attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id

    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription(
            'validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn(data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()

    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn(stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin,
                                      self.stream_id)
        np.testing.assert_array_equal(status_alert_event.values,
                                      np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s",
                  str(status_alert_event.__dict__))
        self.event_verified.set()

    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent,
                             callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)

    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1, 2)
        self.assertEquals(a, 3)
Ejemplo n.º 55
0
    def test_ctd_L2_salinity(self):
        '''
        Test that packets are processed by the ctd_L1_salinity transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='SalinityTransform',
            description='For testing SalinityTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_salinity'
        process_definition.executable['class'] = 'SalinityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id =  self.pubsub.create_stream_definition('sal_stream_def', parameter_dictionary_id=pdict_id)
        sal_stream_id, _ = self.pubsub.create_stream('test_salinity', stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.salinity = sal_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber that will receive the salinity granule from the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_sal = gevent.event.AsyncResult()
        def subscriber3(m,r,s):
            ar_sal.set(m)
        sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3)
        self.addCleanup(sub_sal.stop)

        sub_sal_id = self.pubsub.create_subscription('subscription_sal',
            stream_ids=[sal_stream_id],
            exchange_name='sub_sal')

        self.pubsub.activate_subscription(sub_sal_id)

        self.queue_cleanup.append(sub_sal.xn.queue)

        sub_sal.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_sal.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('salinity'))

        self.check_salinity_algorithm_execution(publish_granule, result)
Ejemplo n.º 56
0
    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)
        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])
        np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45]))
        np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71]))

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'],
                                             np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'],
                                             np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'],
                                             np.array([3.068]))
        np.testing.assert_array_almost_equal(
            rdt_out['density'], np.array([1021.7144739593881],
                                         dtype='float32'))
        np.testing.assert_array_almost_equal(
            rdt_out['salinity'], np.array([30.935132729668283],
                                          dtype='float32'))

        rdt = ph.get_rdt(stream_def_id)
        rdt['lat'] = [46]
        rdt['lon'] = [-73]

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        rdt = ph.get_rdt(stream_def_id)
        rdt['lat'] = [1000]
        rdt['lon'] = [3]

        publisher.publish(rdt.to_granule())

        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['pressure'] = [256.8]

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)
        np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45,
                                                                       46]))
        np.testing.assert_array_almost_equal(rdt_out['lon'],
                                             np.array([-71, -73]))