def test_coverage_ingest(self):
        stream_id = self.pubsub_management.create_stream()
        dataset_id = self.create_dataset()
        # I freaking hate this bug
        self.get_datastore(dataset_id)
        ingestion_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id
        )

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20)
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id)
        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 1)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(20)
        self.assertTrue(comp.all())

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20) + 20
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 2)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())

        granule = self.data_retriever.retrieve(dataset_id)

        black_box = CoverageCraft()
        black_box.sync_rdt_with_granule(granule)
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())
Example #2
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id,
                                                           mode='r')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(
                 param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(coverage=coverage,
                                             start_time=self.start_time,
                                             end_time=self.end_time,
                                             stride_time=self.stride_time,
                                             parameters=self.parameters,
                                             tdoa=self.tdoa)
     except:
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
    def get_last_granule(cls, container, dataset_id):
        dsm_cli = DatasetManagementServiceClient()
        dataset = dsm_cli.read_dataset(dataset_id)
        cc = container
        datastore_name = dataset.datastore_name
        view_name = dataset.view_name
        
        datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)

        opts = dict(
            start_key = [dataset_id, {}],
            end_key   = [dataset_id, 0], 
            descending = True,
            limit = 1,
            include_docs = True
        )

        results = datastore.query_view(view_name,opts=opts)
        if not results:
            raise NotFound('A granule could not be located.')
        if results[0] is None:
            raise NotFound('A granule could not be located.')
        doc = results[0].get('doc')
        if doc is None:
            return None

        ts = float(doc.get('ts_create',0))

        coverage = DatasetManagementService._get_coverage(dataset_id)
        
        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage(start_time=ts,end_time=None)
        granule = black_box.to_granule()

        return granule
Example #4
0
 def get_coverage(cls, data_product_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     if not data_product_id:
         return
     try:
         result, ts = cls._coverages.pop(data_product_id)
         if (time.time() - ts) > cls.CACHE_EXPIRATION:
             result.close()
             raise KeyError(data_product_id)
     except KeyError:
         if data_product_id is None:
             return None
         resource_registry = Container.instance.resource_registry
         dataset_ids, _ = resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)
         if not dataset_ids: return None
         dataset_id = dataset_ids[0]
         result = DatasetManagementService._get_coverage(dataset_id,
                                                         mode='r')
         result.value_caching = False
         ts = time.time()
         if result is None:
             return None
         if len(cls._coverages) >= cls.CACHE_LIMIT:
             key, value = cls._coverages.popitem(0)
             coverage, ts = value
             coverage.close(timeout=5)
     cls._coverages[dataset_id] = result, ts
     return result
Example #5
0
 def execute_retrieve(self):
     """
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     """
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id, mode="r")
         if coverage.num_timesteps == 0:
             log.info("Reading from an empty coverage")
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(
                 coverage=coverage,
                 start_time=self.start_time,
                 end_time=self.end_time,
                 stride_time=self.stride_time,
                 parameters=self.parameters,
                 tdoa=self.tdoa,
             )
     except:
         log.exception("Problems reading from the coverage")
         raise BadRequest("Problems reading from the coverage")
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
Example #6
0
    def get_last_granule(cls, container, dataset_id):
        dsm_cli = DatasetManagementServiceClient()
        dataset = dsm_cli.read_dataset(dataset_id)
        cc = container
        datastore_name = dataset.datastore_name
        view_name = dataset.view_name
        
        datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)

        opts = dict(
            start_key = [dataset_id, {}],
            end_key   = [dataset_id, 0], 
            descending = True,
            limit = 1,
            include_docs = True
        )

        results = datastore.query_view(view_name,opts=opts)
        if not results:
            raise NotFound('A granule could not be located.')
        if results[0] is None:
            raise NotFound('A granule could not be located.')
        doc = results[0].get('doc')
        if doc is None:
            return None

        ts = float(doc.get('ts_create',0))

        coverage = DatasetManagementService._get_coverage(dataset_id)

        rdt = cls._coverage_to_granule(coverage,tdoa=slice(cls.get_relative_time(coverage,ts),None))
        coverage.close(timeout=5)
        return rdt.to_granule()
 def get_coverage(cls, data_product_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     if not data_product_id:
         return
     try:
         result, ts = cls._coverages.pop(data_product_id)
         if (time.time() - ts) > cls.CACHE_EXPIRATION:
             result.close()
             raise KeyError(data_product_id)
     except KeyError:
         if data_product_id is None:
             return None
         resource_registry = Container.instance.resource_registry
         dataset_ids, _ = resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)
         if not dataset_ids: return None
         dataset_id = dataset_ids[0]
         result = DatasetManagementService._get_coverage(dataset_id, mode='r')
         result.value_caching = False
         ts = time.time()
         if result is None:
             return None
         if len(cls._coverages) >= cls.CACHE_LIMIT:
             key, value = cls._coverages.popitem(0)
             coverage, ts = value
             coverage.close(timeout=5)
     cls._coverages[dataset_id] = result, ts
     return result
 def get_last_values(cls, dataset_id):
     coverage = DatasetManagementService._get_coverage(dataset_id)
     
     black_box = CoverageCraft(coverage)
     black_box.sync_rdt_with_coverage(tdoa=slice(-1,None))
     granule = black_box.to_granule()
     return granule
    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()
    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()
    def test_coverage_types(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        cov = DatasetManagementService._get_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, ViewCoverage)

        cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, SimplexCoverage)
Example #12
0
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov, ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *
                          2, temporal_bounds)
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov,ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)
Example #15
0
 def get_last_values(cls, dataset_id, number_of_points):
     coverage = DatasetManagementService._get_coverage(dataset_id,mode='r')
     if coverage.num_timesteps < number_of_points:
         if coverage.num_timesteps == 0:
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
             return rdt.to_granule()
         number_of_points = coverage.num_timesteps
     rdt = cls._coverage_to_granule(coverage,tdoa=slice(-number_of_points,None))
     coverage.close(timeout=5)
     
     return rdt.to_granule()
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     coverage = DatasetManagementService._get_coverage(self.dataset_id)
     crafter = CoverageCraft(coverage)
     #@todo: add bounds checking to ensure the dataset being retrieved is not too large
     crafter.sync_rdt_with_coverage(start_time=self.start_time,end_time=self.end_time,parameters=self.parameters)
     granule = crafter.to_granule()
     return granule
    def get_read_only_coverage(self, dataset_id):
        if not self._context_managed:
            print 'Warning: Coverages will remain open until they are closed or go out of scope - ' \
                  'be sure to close coverage instances when you are finished working with them'

        if dataset_id in self._ro_covs:
            return self._ro_covs[dataset_id]
        else:
            self._ro_covs[dataset_id] = DatasetManagementService._get_coverage(dataset_id, mode='r')

        return self._ro_covs[dataset_id]
Example #18
0
    def get_last_values(cls, dataset_id, number_of_points):
        coverage = DatasetManagementService._get_coverage(dataset_id, mode='r')
        if coverage.num_timesteps < number_of_points:
            if coverage.num_timesteps == 0:
                rdt = RecordDictionaryTool(
                    param_dictionary=coverage.parameter_dictionary)
                return rdt.to_granule()
            number_of_points = coverage.num_timesteps
        rdt = cls._coverage_to_granule(coverage,
                                       tdoa=slice(-number_of_points, None))
        coverage.close(timeout=5)

        return rdt.to_granule()
Example #19
0
 def _replay(self):
     coverage = DatasetManagementService._get_coverage(self.dataset_id)
     rdt = self._coverage_to_granule(coverage, self.start_time, self.end_time, self.stride_time, self.parameters, self.stream_def_id)
     elements = len(rdt)
     
     for i in xrange(elements / self.publish_limit):
         outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
         fields = self.parameters or outgoing.fields
         for field in fields:
             outgoing[field] = rdt[field][(i*self.publish_limit) : ((i+1)*self.publish_limit)]
         yield outgoing
     coverage.close(timeout=5)
     return 
Example #20
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try: 
         coverage = DatasetManagementService._get_coverage(self.dataset_id)
         rdt = self._coverage_to_granule(coverage,self.start_time, self.end_time, self.stride_time, self.parameters,tdoa=self.tdoa)
         coverage.close(timeout=5)
     except Exception as e:
         import traceback
         traceback.print_exc(e)
         raise BadRequest('Problems reading from the coverage')
     return rdt.to_granule()
 def _replay(self):
     coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
     rdt = self._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.stream_def_id)
     elements = len(rdt)
     
     for i in xrange(elements / self.publish_limit):
         outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
         fields = self.parameters or outgoing.fields
         for field in fields:
             v = rdt[field]
             if v is not None:
                 outgoing[field] = v[(i*self.publish_limit) : ((i+1)*self.publish_limit)]
         yield outgoing
     coverage.close(timeout=5)
     return 
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)
            
 
        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()


        self.assertTrue(event.is_set())
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())
 def get_coverage(self, stream_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     try:
         result = self._coverages.pop(stream_id)
     except KeyError:
         dataset_id = self.get_dataset(stream_id)
         if dataset_id is None:
             return None
         result = DatasetManagementService._get_coverage(dataset_id)
         if result is None:
             return None
         if len(self._coverages) >= self.CACHE_LIMIT:
             self._coverages.popitem(0)
     self._coverages[stream_id] = result
     return result
    def get_read_only_coverage(self, dataset_id):
        if not self._context_managed:
            warn_user('Warning: Coverages will remain open until they are closed or go out of scope - '
                           'be sure to close coverage instances when you are finished working with them or call self.clean_up(ro_covs=True)')

        # Check if we already have the coverage
        if dataset_id in self._ro_covs:
            cov = self._ro_covs[dataset_id]
            # If it's not closed, return it
            if not cov.closed:
                return cov
            # Otherwise, remove it from self._ro_covs and carry on
            del self._ro_covs[dataset_id]

        self._ro_covs[dataset_id] = DatasetManagementService._get_coverage(dataset_id, mode='r')

        return self._ro_covs[dataset_id]
 def get_coverage(self, stream_id):
     """
     Memoization (LRU) of _get_coverage
     """
     try:
         result = self._coverages.pop(stream_id)
     except KeyError:
         dataset_id = self.get_dataset(stream_id)
         if dataset_id is None:
             return None
         result = DatasetManagementService._get_coverage(dataset_id, mode="a")
         if result is None:
             return None
         if len(self._coverages) >= self.CACHE_LIMIT:
             k, coverage = self._coverages.popitem(0)
             coverage.close(timeout=5)
     self._coverages[stream_id] = result
     return result
Example #27
0
 def get_coverage(self, stream_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     try:
         result = self._coverages.pop(stream_id)
     except KeyError:
         dataset_id = self.get_dataset(stream_id)
         if dataset_id is None:
             return None
         result = DatasetManagementService._get_coverage(dataset_id,
                                                         mode='a')
         if result is None:
             return None
         if len(self._coverages) >= self.CACHE_LIMIT:
             k, coverage = self._coverages.popitem(0)
             coverage.close(timeout=5)
     self._coverages[stream_id] = result
     return result
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values("time", np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)
 def _get_coverage(cls,dataset_id):
     '''
     Memoized coverage instantiation and management
     '''
     # Cached get
     retval = None
     with cls._cache_lock:
         try:
             retval, age = cls._retrieve_cache.pop(dataset_id)
             if (time.time() - age) > cls._refresh_interval:
                 raise KeyError(dataset_id)
         except KeyError: # Cache miss
             #@TODO: Add in LRU logic (maybe some mem checking too!)
             if len(cls._retrieve_cache) > cls._cache_limit:
                 cls._retrieve_cache.popitem(0)
             retval = DatasetManagementService._get_coverage(dataset_id, mode='r') 
         age = time.time()
         cls._retrieve_cache[dataset_id] = (retval, age)
     return retval
Example #30
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try: 
         coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else: 
             rdt = self._coverage_to_granule(coverage=coverage,start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters,tdoa=self.tdoa)
     except Exception as e:
         import traceback
         traceback.print_exc(e)
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
 def _get_coverage(cls, dataset_id):
     '''
     Memoized coverage instantiation and management
     '''
     # Cached get
     retval = None
     with cls._cache_lock:
         try:
             retval, age = cls._retrieve_cache.pop(dataset_id)
             if (time.time() - age) > cls._refresh_interval:
                 raise KeyError(dataset_id)
         except KeyError:  # Cache miss
             #@TODO: Add in LRU logic (maybe some mem checking too!)
             if len(cls._retrieve_cache) > cls._cache_limit:
                 cls._retrieve_cache.popitem(0)
             retval = DatasetManagementService._get_coverage(dataset_id,
                                                             mode='r')
         age = time.time()
         cls._retrieve_cache[dataset_id] = (retval, age)
     return retval
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)
    def get_last_values(cls, dataset_id, number_of_points=100, delivery_format=''):
        stream_def_id = delivery_format
        try:
            cov = DatasetManagementService._get_coverage(dataset_id, mode='r')
            if cov.is_empty():
                rdt = RecordDictionaryTool(param_dictionary=cov.parameter_dictionary)
            else:
                time_array = cov.get_parameter_values([cov.temporal_parameter_name], sort_parameter=cov.temporal_parameter_name).get_data()
                time_array = time_array[cov.temporal_parameter_name][-number_of_points:]

                t0 = np.asscalar(time_array[0])
                t1 = np.asscalar(time_array[-1])

                data_dict = cov.get_parameter_values(time_segment=(t0, t1), fill_empty_params=True).get_data()
                rdt = cls._data_dict_to_rdt(data_dict, stream_def_id, cov)
        except:
            log.exception('Problems reading from the coverage')
            raise BadRequest('Problems reading from the coverage')
        finally:
            if cov is not None:
                cov.close(timeout=5)
        return rdt
Example #34
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try: 
         coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
         if coverage.is_empty():
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else: 
             rdt = ReplayProcess._cov2granule(coverage=coverage, 
                     start_time=self.start_time, 
                     end_time=self.end_time,
                     stride_time=self.stride_time, 
                     parameters=self.parameters, 
                     stream_def_id=self.delivery_format, 
                     tdoa=self.tdoa)
     except:
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
 def get_coverage(self, dataset_id):
     cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
     return cov
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds)
class DatasetManagementTest(PyonTestCase):
    def setUp(self):
        mock_clients = self._create_service_mock('dataset_management')
        self.dataset_management = DatasetManagementService()
        self.dataset_management.clients = mock_clients

        self.mock_rr_create = self.dataset_management.clients.resource_registry.create
        self.mock_rr_read = self.dataset_management.clients.resource_registry.read
        self.mock_rr_update = self.dataset_management.clients.resource_registry.update
        self.mock_rr_delete = self.dataset_management.clients.resource_registry.delete
        self.mock_rr_create_assoc = self.dataset_management.clients.resource_registry.create_association
        self.mock_rr_find_assocs = self.dataset_management.clients.resource_registry.find_associations
        self.mock_rr_delete_assoc = self.dataset_management.clients.resource_registry.delete_association

    def test_create_dataset(self):
        # mocks
        self.mock_rr_create.return_value = ('dataset_id','rev')

        # execution
        self.dataset_management._create_coverage = Mock()
        self.dataset_management._persist_coverage = Mock()
        dataset_id = self.dataset_management.create_dataset(name='123',stream_id='123',datastore_name='fake_datastore', parameter_dict=[0], spatial_domain=[0], temporal_domain=[0])


        # assertions
        self.assertEquals(dataset_id,'dataset_id')
        self.assertTrue(self.mock_rr_create.called)
        self.assertTrue(self.mock_rr_create_assoc.call_count)


    def test_create_coverage(self):
        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        pdict = craft.create_parameters()
        pdict = pdict.dump()

        coverage = self.dataset_management._create_coverage("doesn't matter", pdict, sdom, tdom)
        self.assertIsInstance(coverage,SimplexCoverage)

    @patch('ion.services.dm.inventory.dataset_management_service.SimplexCoverage')
    @patch('ion.services.dm.inventory.dataset_management_service.validate_is_instance')
    def test_persist_coverage(self,validation, cov_mock):
        validation = Mock()
        cov_mock.save = Mock()
        mock_bb = CoverageCraft()
        self.dataset_management._persist_coverage('dataset_id', mock_bb.coverage)


    @patch('ion.services.dm.inventory.dataset_management_service.SimplexCoverage')
    def test_get_coverage(self, cov_mock):
        cov_mock.load = Mock()
        cov_mock.load.return_value = 'test'

        retval = self.dataset_management._get_coverage('dataset_id')
        self.assertEquals(retval,'test')


    def test_update_dataset(self):
        # mocks
        mock_dataset = DotDict({'_id':'dataset_id'})


        # execution
        self.dataset_management.update_dataset(mock_dataset)


        # assertions
        self.mock_rr_update.assert_called_with(mock_dataset)

    def test_delete_dataset(self):
        # mocks
        self.mock_rr_find_assocs.return_value = ['assoc']

        # execution
        self.dataset_management.delete_dataset('123')

        # assertions
        self.mock_rr_delete.assert_called_with('123')
        self.assertTrue(self.mock_rr_delete_assoc.call_count == 1)

    def test_add_stream(self):
        self.dataset_management.add_stream('dataset_id','stream_id')
        self.assertTrue(self.mock_rr_create_assoc.call_count)
    
    def test_remove_stream(self):
        self.mock_rr_find_assocs.return_value = [0]
        self.dataset_management.remove_stream('dataset_id','stream_id')
        self.assertTrue(self.mock_rr_delete_assoc.call_count)

    def test_get_dataset_info(self):
        coverage = DotDict()
        coverage.info = 1

        self.dataset_management._get_coverage = Mock()
        self.dataset_management._get_coverage.return_value = coverage

        retval = self.dataset_management.get_dataset_info('dataset_id')
        self.assertEquals(retval,1)


    def test_get_dataset_parameters(self):
        coverage = DotDict()
        coverage.parameter_dictionary.dump = Mock()
        coverage.parameter_dictionary.dump.return_value = 1

        self.dataset_management._get_coverage = Mock()
        self.dataset_management._get_coverage.return_value = coverage

        retval = self.dataset_management.get_dataset_parameters('dataset_id')
        self.assertEquals(retval,1)
 def get_coverage(self, dataset_id):
     cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
     return cov
 def get_coverage(self):
     return DatasetManagementService._get_coverage(self.dataset_id, mode='r')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream(
            'replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id,
            bb.coverage)  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()
 def get_coverage(self):
     return DatasetManagementService._get_coverage(self.dataset_id, mode='r')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )
        replay_stream, replay_route = self.pubsub_management.create_stream(
            "replay", "xp1", stream_definition_id=stream_def_id
        )
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt["time"] = np.arange(100)
        bb.rdt["temp"] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id, bb.coverage
        )  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp("xp1")
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()