예제 #1
0
    def on_start(self):
        self.data_source_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.DataSource,
            callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.ExternalDataProvider,
            callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services',
                                               False)
        self.server = CFG.get_safe(
            'eoi.importer_service.server', "localhost") + ":" + str(
                CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices=" + str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...")

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")
예제 #2
0
    def check_global_range(self):
        TestQCFunctions.check_global_range(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(times, [0.0, 7.0])
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'],
                                             [0, 1, 1, 1, 1, 1, 1, 0])
        self.assertTrue(flagged.wait(10))
예제 #3
0
    def test_pub_on_different_origins(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="one", description="1")
        pub.publish_event(origin="two", description="2")
        pub.publish_event(origin="three", description="3")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 3)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
        self.assertEquals(res[2].description, "3")
예제 #4
0
    def check_localrange(self):
        log.info('check_localrange')
        TestQCFunctions.check_localrange(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            if not event.qc_parameter == 'tempwat_loclrng_qc':
                return
            np.testing.assert_array_equal(
                times,
                np.array([
                    3580144708.7555027, 3580144709.7555027, 3580144710.7555027,
                    3580144711.7555027, 3580144712.7555027
                ]))
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)
        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_loclrng_qc'],
                                             [1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
        self.assertTrue(flagged.wait(10))
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=data_product_id,
                             callback=cb,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
예제 #6
0
    def on_start(self): #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)
        
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()
        
        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None
        
        self.start_listener()
예제 #7
0
    def test_pub_sub_exception_event_origin(self):
        #test origin
        ar = event.AsyncResult()

        self.count = 0

        def cb(*args, **kwargs):
            self.count = self.count + 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="specific")
        self._listen(sub)

        @handle_stream_exception("specific")
        def _test_origin():
            raise CorruptionError()

        _test_origin()

        exception_event = ar.get(timeout=5)

        self.assertEquals(self.count, 1)
        self.assertEquals(exception_event.exception_type,
                          "<class 'pyon.core.exception.CorruptionError'>")
        self.assertEquals(exception_event.origin, "specific")
예제 #8
0
    def test_base_subscriber_as_catchall(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(callback=cb)
        pub1 = EventPublisher(event_type="ResourceEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        self._listen(sub)

        pub1.publish_event(origin="some", description="1")
        pub2.publish_event(origin="other", description="2")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
예제 #9
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent",
                              sub_type="st1",
                              callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
예제 #10
0
    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')
예제 #11
0
    def test_event_subscriber_auto_delete(self):
        mocknode = Mock()
        ev = EventSubscriber(event_type="ProcessLifecycleEvent",
                             callback=lambda *a, **kw: None,
                             auto_delete=sentinel.auto_delete,
                             node=mocknode)
        self.assertEquals(ev._auto_delete, sentinel.auto_delete)

        # we don't want to have to patch out everything here, so call initialize directly, which calls create_channel for us
        ev._setup_listener = Mock()
        ev.initialize(sentinel.binding)

        self.assertEquals(ev._chan.queue_auto_delete, sentinel.auto_delete)
예제 #12
0
 def on_start(self):
     '''
     Process initialization
     '''
     self._thread = self._process.thread_manager.spawn(self.thread_loop)
     self._event_subscriber = EventSubscriber(
         event_type=OT.ResetQCEvent,
         callback=self.receive_event,
         auto_delete=True)  # TODO Correct event types
     self._event_subscriber.start()
     self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
     self.resource_registry = self.container.resource_registry
     self.event_queue = Queue()
예제 #13
0
    def test_pub_sub_exception_event(self):
        ar = event.AsyncResult()

        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        #test file system error event
        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="stream_exception")
        self._listen(sub)

        @handle_stream_exception()
        def _raise_filesystem_error():
            raise FilesystemError()

        _raise_filesystem_error()

        @handle_stream_exception()
        def _raise_streaming_error():
            raise StreamingError()

        _raise_streaming_error()

        @handle_stream_exception()
        def _raise_corruption_error():
            raise CorruptionError()

        _raise_corruption_error()

        ar.get(timeout=5)
        res = []
        for i in xrange(self.count):
            exception_event = gq.get(timeout=5)
            res.append(exception_event)

        self.assertEquals(res[0].exception_type,
                          "<class 'pyon.core.exception.FilesystemError'>")
        self.assertEquals(res[1].exception_type,
                          "<class 'pyon.core.exception.StreamingError'>")
        self.assertEquals(res[2].exception_type,
                          "<class 'pyon.core.exception.CorruptionError'>")
        self.assertEquals(res[2].origin, "stream_exception")
예제 #14
0
    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop,
                                       self.persist_interval)
        log.debug(
            'EventPersister persist greenlet started in "%s" (interval %s)',
            self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister",
                                         auto_delete=False)

        self.event_sub.start()
예제 #15
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key', None)
     self.qc_params = self.CFG.get_safe('process.qc_params', [])
     validate_is_not_none(
         self.interval_key,
         'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                             origin=self.interval_key,
                                             callback=self._event_callback,
                                             auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
     self.run_interval = self.CFG.get_safe(
         'service.qc_processing.run_interval', 24)
예제 #16
0
 def on_start(self):
     TransformDataProcess.on_start(self)
     self.pubsub_management = PubsubManagementServiceProcessClient(
         process=self)
     self.stored_values = StoredValueManager(self.container)
     self.input_data_product_ids = self.CFG.get_safe(
         'process.input_products', [])
     self.output_data_product_ids = self.CFG.get_safe(
         'process.output_products', [])
     self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
     self.new_lookups = Queue()
     self.lookup_monitor = EventSubscriber(
         event_type=OT.ExternalReferencesUpdatedEvent,
         callback=self._add_lookups,
         auto_delete=True)
     self.lookup_monitor.start()
예제 #17
0
    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe(
            'interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe(
            'container.org_boundary', False)
        self._container_org_name = CFG.get_safe(
            'container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (
            self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(
            process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(
            process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(
                event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl",
                                    message="Container start")
예제 #18
0
    def make_large_dataset(self, temp_vector):

        monitor_queue = Queue()
        # Make 27 hours of data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        data_product_id, dataset_id, stream_def_id = self.make_data_product()
        es = EventSubscriber(
            event_type=OT.DatasetModified,
            origin=dataset_id,
            auto_delete=True,
            callback=lambda *args, **kwargs: monitor_queue.put(1))
        es.start()
        self.addCleanup(es.stop)
        for rdt in self.populate_vectors(stream_def_id, 3, temp_vector):
            ph.publish_rdt_to_data_product(data_product_id, rdt)

        try:
            for i in xrange(3):
                monitor_queue.get(timeout=10)
        except Empty:
            raise AssertionError('Failed to populate dataset in time')

        return data_product_id
예제 #19
0
    def test_subscriber_listening_for_specific_origin(self):
        ar = event.AsyncResult()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ResourceEvent",
                              origin="specific",
                              callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="notspecific", description="1")
        pub.publish_event(origin="notspecific", description="2")
        pub.publish_event(origin="specific", description="3")
        pub.publish_event(origin="notspecific", description="4")

        evmsg = ar.get(timeout=5)
        self.assertEquals(self.count, 1)
        self.assertEquals(evmsg.description, "3")
예제 #20
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [
            DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0)
            for i in xrange(4)
        ]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()

            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.*",
                               callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.a",
                               callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="*.a",
                               callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1",
                               callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
예제 #21
0
    def test_pub_and_sub(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent",
                              callback=cb,
                              origin="specific")
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)
        pub.publish_event(origin="specific", description="hello")

        event_obj = bootstrap.IonObject('ResourceEvent',
                                        origin='specific',
                                        description='more testing')
        self.assertEqual(event_obj, pub.publish_event_object(event_obj))

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='2423')
            pub.publish_event_object(event_obj)
        self.assertIn('The ts_created value is not a valid timestamp',
                      cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='1000494978462')
            pub.publish_event_object(event_obj)
        self.assertIn('This ts_created value is too old', cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing')
            event_obj._id = '343434'
            pub.publish_event_object(event_obj)
        self.assertIn('The event object cannot contain a _id field',
                      cm.exception.message)

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), self.count)
        self.assertEquals(res[0].description, "hello")
        self.assertAlmostEquals(int(res[0].ts_created),
                                int(get_ion_ts()),
                                delta=5000)

        self.assertEquals(res[1].description, "more testing")
        self.assertAlmostEquals(int(res[1].ts_created),
                                int(get_ion_ts()),
                                delta=5000)
예제 #22
0
    def test_qc_interval_integration(self):

        # 1 need to make a dataset that only has one discrete qc violation
        # 2 Launch the process
        # 3 Setup the scheduler to run it say three times
        # 4 Get the Events and verify the data

        #--------------------------------------------------------------------------------
        # Make a dataset that has only one discrete qc violation
        #--------------------------------------------------------------------------------

        dp_id, dataset_id, stream_def_id = self.make_data_product()
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        for rdt in self.populate_vectors(stream_def_id, 1,
                                         lambda x: [41] + [39] * (x - 1)):
            ph.publish_rdt_to_data_product(dp_id, rdt)
        self.assertTrue(
            monitor.event.wait(10))  # Give it 10 seconds to populate

        #--------------------------------------------------------------------------------
        # Launch the process
        #--------------------------------------------------------------------------------

        interval_key = uuid4().hex
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = [
            'glblrng_qc'
        ]  # The others are tested in other tests for completeness
        self.sync_launch(config)

        async_queue = Queue()

        def callback(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), 1)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=dp_id,
                             callback=callback,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #--------------------------------------------------------------------------------
        # Setup the scheduler
        #--------------------------------------------------------------------------------

        timer_id = self.scheduler_service.create_interval_timer(
            start_time=time.time(),
            end_time=time.time() + 13,
            interval=5,
            event_origin=interval_key)

        #--------------------------------------------------------------------------------
        # Get the events and verify them
        #--------------------------------------------------------------------------------

        try:
            for i in xrange(2):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC Events not raised')
예제 #23
0
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))
예제 #24
0
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)