def test_coverage_ingest(self): stream_id = self.pubsub_management.create_stream() dataset_id = self.create_dataset() # I freaking hate this bug self.get_datastore(dataset_id) ingestion_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id ) black_box = CoverageCraft() black_box.rdt["time"] = np.arange(20) black_box.rdt["temp"] = np.random.random(20) * 10 black_box.sync_with_granule() granule = black_box.to_granule() publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id) publisher.publish(granule) self.wait_until_we_have_enough_granules(dataset_id, 1) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage() comp = black_box.rdt["time"] == np.arange(20) self.assertTrue(comp.all()) black_box = CoverageCraft() black_box.rdt["time"] = np.arange(20) + 20 black_box.rdt["temp"] = np.random.random(20) * 10 black_box.sync_with_granule() granule = black_box.to_granule() publisher.publish(granule) self.wait_until_we_have_enough_granules(dataset_id, 2) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage() comp = black_box.rdt["time"] == np.arange(40) self.assertTrue(comp.all()) granule = self.data_retriever.retrieve(dataset_id) black_box = CoverageCraft() black_box.sync_rdt_with_granule(granule) comp = black_box.rdt["time"] == np.arange(40) self.assertTrue(comp.all())
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id, mode='r') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, tdoa=self.tdoa) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: coverage.close(timeout=5) return rdt.to_granule()
def get_last_granule(cls, container, dataset_id): dsm_cli = DatasetManagementServiceClient() dataset = dsm_cli.read_dataset(dataset_id) cc = container datastore_name = dataset.datastore_name view_name = dataset.view_name datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) opts = dict( start_key = [dataset_id, {}], end_key = [dataset_id, 0], descending = True, limit = 1, include_docs = True ) results = datastore.query_view(view_name,opts=opts) if not results: raise NotFound('A granule could not be located.') if results[0] is None: raise NotFound('A granule could not be located.') doc = results[0].get('doc') if doc is None: return None ts = float(doc.get('ts_create',0)) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage(start_time=ts,end_time=None) granule = black_box.to_granule() return granule
def get_coverage(cls, data_product_id): ''' Memoization (LRU) of _get_coverage ''' if not data_product_id: return try: result, ts = cls._coverages.pop(data_product_id) if (time.time() - ts) > cls.CACHE_EXPIRATION: result.close() raise KeyError(data_product_id) except KeyError: if data_product_id is None: return None resource_registry = Container.instance.resource_registry dataset_ids, _ = resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True) if not dataset_ids: return None dataset_id = dataset_ids[0] result = DatasetManagementService._get_coverage(dataset_id, mode='r') result.value_caching = False ts = time.time() if result is None: return None if len(cls._coverages) >= cls.CACHE_LIMIT: key, value = cls._coverages.popitem(0) coverage, ts = value coverage.close(timeout=5) cls._coverages[dataset_id] = result, ts return result
def execute_retrieve(self): """ execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream """ try: coverage = DatasetManagementService._get_coverage(self.dataset_id, mode="r") if coverage.num_timesteps == 0: log.info("Reading from an empty coverage") rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule( coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, tdoa=self.tdoa, ) except: log.exception("Problems reading from the coverage") raise BadRequest("Problems reading from the coverage") finally: coverage.close(timeout=5) return rdt.to_granule()
def get_last_granule(cls, container, dataset_id): dsm_cli = DatasetManagementServiceClient() dataset = dsm_cli.read_dataset(dataset_id) cc = container datastore_name = dataset.datastore_name view_name = dataset.view_name datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) opts = dict( start_key = [dataset_id, {}], end_key = [dataset_id, 0], descending = True, limit = 1, include_docs = True ) results = datastore.query_view(view_name,opts=opts) if not results: raise NotFound('A granule could not be located.') if results[0] is None: raise NotFound('A granule could not be located.') doc = results[0].get('doc') if doc is None: return None ts = float(doc.get('ts_create',0)) coverage = DatasetManagementService._get_coverage(dataset_id) rdt = cls._coverage_to_granule(coverage,tdoa=slice(cls.get_relative_time(coverage,ts),None)) coverage.close(timeout=5) return rdt.to_granule()
def get_coverage(cls, data_product_id): ''' Memoization (LRU) of _get_coverage ''' if not data_product_id: return try: result, ts = cls._coverages.pop(data_product_id) if (time.time() - ts) > cls.CACHE_EXPIRATION: result.close() raise KeyError(data_product_id) except KeyError: if data_product_id is None: return None resource_registry = Container.instance.resource_registry dataset_ids, _ = resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True) if not dataset_ids: return None dataset_id = dataset_ids[0] result = DatasetManagementService._get_coverage(dataset_id, mode='r') result.value_caching = False ts = time.time() if result is None: return None if len(cls._coverages) >= cls.CACHE_LIMIT: key, value = cls._coverages.popitem(0) coverage, ts = value coverage.close(timeout=5) cls._coverages[dataset_id] = result, ts return result
def get_last_values(cls, dataset_id): coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage(tdoa=slice(-1,None)) granule = black_box.to_granule() return granule
def apply_to_dataset(self, dataset, calibration_update): cov = DatasetManagementService._get_coverage(dataset, mode='r+') try: self.set_sparse_values(cov, calibration_update) self.publish_calibration_event(dataset, calibration_update.keys()) finally: cov.close()
def apply_to_dataset(self, dataset, calibration_update): cov = DatasetManagementService._get_coverage(dataset, mode='r+') try: self.set_sparse_values(cov, calibration_update) self.publish_calibration_event(dataset, calibration_update.keys()) finally: cov.close()
def test_coverage_types(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() cov = DatasetManagementService._get_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, ViewCoverage) cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, SimplexCoverage)
def test_thorough_gap_analysis(self): dataset_id = self.test_ingestion_gap_analysis() vcov = DatasetManagementService._get_coverage(dataset_id) self.assertIsInstance(vcov, ViewCoverage) ccov = vcov.reference_coverage self.assertIsInstance(ccov, ComplexCoverage) self.assertEquals(len(ccov._reference_covs), 3)
def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds( dataset_id) self.assertEquals([coverage.get_parameter_context('time').fill_value] * 2, temporal_bounds)
def test_thorough_gap_analysis(self): dataset_id = self.test_ingestion_gap_analysis() vcov = DatasetManagementService._get_coverage(dataset_id) self.assertIsInstance(vcov,ViewCoverage) ccov = vcov.reference_coverage self.assertIsInstance(ccov, ComplexCoverage) self.assertEquals(len(ccov._reference_covs), 3)
def get_last_values(cls, dataset_id, number_of_points): coverage = DatasetManagementService._get_coverage(dataset_id,mode='r') if coverage.num_timesteps < number_of_points: if coverage.num_timesteps == 0: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) return rdt.to_granule() number_of_points = coverage.num_timesteps rdt = cls._coverage_to_granule(coverage,tdoa=slice(-number_of_points,None)) coverage.close(timeout=5) return rdt.to_granule()
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' coverage = DatasetManagementService._get_coverage(self.dataset_id) crafter = CoverageCraft(coverage) #@todo: add bounds checking to ensure the dataset being retrieved is not too large crafter.sync_rdt_with_coverage(start_time=self.start_time,end_time=self.end_time,parameters=self.parameters) granule = crafter.to_granule() return granule
def get_read_only_coverage(self, dataset_id): if not self._context_managed: print 'Warning: Coverages will remain open until they are closed or go out of scope - ' \ 'be sure to close coverage instances when you are finished working with them' if dataset_id in self._ro_covs: return self._ro_covs[dataset_id] else: self._ro_covs[dataset_id] = DatasetManagementService._get_coverage(dataset_id, mode='r') return self._ro_covs[dataset_id]
def get_last_values(cls, dataset_id, number_of_points): coverage = DatasetManagementService._get_coverage(dataset_id, mode='r') if coverage.num_timesteps < number_of_points: if coverage.num_timesteps == 0: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) return rdt.to_granule() number_of_points = coverage.num_timesteps rdt = cls._coverage_to_granule(coverage, tdoa=slice(-number_of_points, None)) coverage.close(timeout=5) return rdt.to_granule()
def _replay(self): coverage = DatasetManagementService._get_coverage(self.dataset_id) rdt = self._coverage_to_granule(coverage, self.start_time, self.end_time, self.stride_time, self.parameters, self.stream_def_id) elements = len(rdt) for i in xrange(elements / self.publish_limit): outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id) fields = self.parameters or outgoing.fields for field in fields: outgoing[field] = rdt[field][(i*self.publish_limit) : ((i+1)*self.publish_limit)] yield outgoing coverage.close(timeout=5) return
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id) rdt = self._coverage_to_granule(coverage,self.start_time, self.end_time, self.stride_time, self.parameters,tdoa=self.tdoa) coverage.close(timeout=5) except Exception as e: import traceback traceback.print_exc(e) raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def _replay(self): coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r') rdt = self._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.stream_def_id) elements = len(rdt) for i in xrange(elements / self.publish_limit): outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id) fields = self.parameters or outgoing.fields for field in fields: v = rdt[field] if v is not None: outgoing[field] = v[(i*self.publish_limit) : ((i+1)*self.publish_limit)] yield outgoing coverage.close(timeout=5) return
def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set())
def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set())
def get_coverage(self, stream_id): ''' Memoization (LRU) of _get_coverage ''' try: result = self._coverages.pop(stream_id) except KeyError: dataset_id = self.get_dataset(stream_id) if dataset_id is None: return None result = DatasetManagementService._get_coverage(dataset_id) if result is None: return None if len(self._coverages) >= self.CACHE_LIMIT: self._coverages.popitem(0) self._coverages[stream_id] = result return result
def get_read_only_coverage(self, dataset_id): if not self._context_managed: warn_user('Warning: Coverages will remain open until they are closed or go out of scope - ' 'be sure to close coverage instances when you are finished working with them or call self.clean_up(ro_covs=True)') # Check if we already have the coverage if dataset_id in self._ro_covs: cov = self._ro_covs[dataset_id] # If it's not closed, return it if not cov.closed: return cov # Otherwise, remove it from self._ro_covs and carry on del self._ro_covs[dataset_id] self._ro_covs[dataset_id] = DatasetManagementService._get_coverage(dataset_id, mode='r') return self._ro_covs[dataset_id]
def get_coverage(self, stream_id): """ Memoization (LRU) of _get_coverage """ try: result = self._coverages.pop(stream_id) except KeyError: dataset_id = self.get_dataset(stream_id) if dataset_id is None: return None result = DatasetManagementService._get_coverage(dataset_id, mode="a") if result is None: return None if len(self._coverages) >= self.CACHE_LIMIT: k, coverage = self._coverages.popitem(0) coverage.close(timeout=5) self._coverages[stream_id] = result return result
def get_coverage(self, stream_id): ''' Memoization (LRU) of _get_coverage ''' try: result = self._coverages.pop(stream_id) except KeyError: dataset_id = self.get_dataset(stream_id) if dataset_id is None: return None result = DatasetManagementService._get_coverage(dataset_id, mode='a') if result is None: return None if len(self._coverages) >= self.CACHE_LIMIT: k, coverage = self._coverages.popitem(0) coverage.close(timeout=5) self._coverages[stream_id] = result return result
def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values("time", np.arange(ntp_ago, ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)
def _get_coverage(cls,dataset_id): ''' Memoized coverage instantiation and management ''' # Cached get retval = None with cls._cache_lock: try: retval, age = cls._retrieve_cache.pop(dataset_id) if (time.time() - age) > cls._refresh_interval: raise KeyError(dataset_id) except KeyError: # Cache miss #@TODO: Add in LRU logic (maybe some mem checking too!) if len(cls._retrieve_cache) > cls._cache_limit: cls._retrieve_cache.popitem(0) retval = DatasetManagementService._get_coverage(dataset_id, mode='r') age = time.time() cls._retrieve_cache[dataset_id] = (retval, age) return retval
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule(coverage=coverage,start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters,tdoa=self.tdoa) except Exception as e: import traceback traceback.print_exc(e) raise BadRequest('Problems reading from the coverage') finally: coverage.close(timeout=5) return rdt.to_granule()
def _get_coverage(cls, dataset_id): ''' Memoized coverage instantiation and management ''' # Cached get retval = None with cls._cache_lock: try: retval, age = cls._retrieve_cache.pop(dataset_id) if (time.time() - age) > cls._refresh_interval: raise KeyError(dataset_id) except KeyError: # Cache miss #@TODO: Add in LRU logic (maybe some mem checking too!) if len(cls._retrieve_cache) > cls._cache_limit: cls._retrieve_cache.popitem(0) retval = DatasetManagementService._get_coverage(dataset_id, mode='r') age = time.time() cls._retrieve_cache[dataset_id] = (retval, age) return retval
def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) coverage = DatasetManagementService._get_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago, ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds( dataset_id) self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)
def get_last_values(cls, dataset_id, number_of_points=100, delivery_format=''): stream_def_id = delivery_format try: cov = DatasetManagementService._get_coverage(dataset_id, mode='r') if cov.is_empty(): rdt = RecordDictionaryTool(param_dictionary=cov.parameter_dictionary) else: time_array = cov.get_parameter_values([cov.temporal_parameter_name], sort_parameter=cov.temporal_parameter_name).get_data() time_array = time_array[cov.temporal_parameter_name][-number_of_points:] t0 = np.asscalar(time_array[0]) t1 = np.asscalar(time_array[-1]) data_dict = cov.get_parameter_values(time_segment=(t0, t1), fill_empty_params=True).get_data() rdt = cls._data_dict_to_rdt(data_dict, stream_def_id, cov) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: if cov is not None: cov.close(timeout=5) return rdt
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r') if coverage.is_empty(): log.info('Reading from an empty coverage') rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: rdt = ReplayProcess._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.delivery_format, tdoa=self.tdoa) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: coverage.close(timeout=5) return rdt.to_granule()
def get_coverage(self, dataset_id): cov = DatasetManagementService._get_coverage(dataset_id, mode='r+') return cov
def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds)
class DatasetManagementTest(PyonTestCase): def setUp(self): mock_clients = self._create_service_mock('dataset_management') self.dataset_management = DatasetManagementService() self.dataset_management.clients = mock_clients self.mock_rr_create = self.dataset_management.clients.resource_registry.create self.mock_rr_read = self.dataset_management.clients.resource_registry.read self.mock_rr_update = self.dataset_management.clients.resource_registry.update self.mock_rr_delete = self.dataset_management.clients.resource_registry.delete self.mock_rr_create_assoc = self.dataset_management.clients.resource_registry.create_association self.mock_rr_find_assocs = self.dataset_management.clients.resource_registry.find_associations self.mock_rr_delete_assoc = self.dataset_management.clients.resource_registry.delete_association def test_create_dataset(self): # mocks self.mock_rr_create.return_value = ('dataset_id','rev') # execution self.dataset_management._create_coverage = Mock() self.dataset_management._persist_coverage = Mock() dataset_id = self.dataset_management.create_dataset(name='123',stream_id='123',datastore_name='fake_datastore', parameter_dict=[0], spatial_domain=[0], temporal_domain=[0]) # assertions self.assertEquals(dataset_id,'dataset_id') self.assertTrue(self.mock_rr_create.called) self.assertTrue(self.mock_rr_create_assoc.call_count) def test_create_coverage(self): craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() pdict = craft.create_parameters() pdict = pdict.dump() coverage = self.dataset_management._create_coverage("doesn't matter", pdict, sdom, tdom) self.assertIsInstance(coverage,SimplexCoverage) @patch('ion.services.dm.inventory.dataset_management_service.SimplexCoverage') @patch('ion.services.dm.inventory.dataset_management_service.validate_is_instance') def test_persist_coverage(self,validation, cov_mock): validation = Mock() cov_mock.save = Mock() mock_bb = CoverageCraft() self.dataset_management._persist_coverage('dataset_id', mock_bb.coverage) @patch('ion.services.dm.inventory.dataset_management_service.SimplexCoverage') def test_get_coverage(self, cov_mock): cov_mock.load = Mock() cov_mock.load.return_value = 'test' retval = self.dataset_management._get_coverage('dataset_id') self.assertEquals(retval,'test') def test_update_dataset(self): # mocks mock_dataset = DotDict({'_id':'dataset_id'}) # execution self.dataset_management.update_dataset(mock_dataset) # assertions self.mock_rr_update.assert_called_with(mock_dataset) def test_delete_dataset(self): # mocks self.mock_rr_find_assocs.return_value = ['assoc'] # execution self.dataset_management.delete_dataset('123') # assertions self.mock_rr_delete.assert_called_with('123') self.assertTrue(self.mock_rr_delete_assoc.call_count == 1) def test_add_stream(self): self.dataset_management.add_stream('dataset_id','stream_id') self.assertTrue(self.mock_rr_create_assoc.call_count) def test_remove_stream(self): self.mock_rr_find_assocs.return_value = [0] self.dataset_management.remove_stream('dataset_id','stream_id') self.assertTrue(self.mock_rr_delete_assoc.call_count) def test_get_dataset_info(self): coverage = DotDict() coverage.info = 1 self.dataset_management._get_coverage = Mock() self.dataset_management._get_coverage.return_value = coverage retval = self.dataset_management.get_dataset_info('dataset_id') self.assertEquals(retval,1) def test_get_dataset_parameters(self): coverage = DotDict() coverage.parameter_dictionary.dump = Mock() coverage.parameter_dictionary.dump.return_value = 1 self.dataset_management._get_coverage = Mock() self.dataset_management._get_coverage.return_value = coverage retval = self.dataset_management.get_dataset_parameters('dataset_id') self.assertEquals(retval,1)
def get_coverage(self, dataset_id): cov = DatasetManagementService._get_coverage(dataset_id, mode='r+') return cov
def get_coverage(self): return DatasetManagementService._get_coverage(self.dataset_id, mode='r')
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream( 'replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def get_coverage(self): return DatasetManagementService._get_coverage(self.dataset_id, mode='r')
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext("binary", param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext("records", param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( "replay_pdict", parameter_context_ids=context_ids, temporal_context="time" ) stream_def_id = self.pubsub_management.create_stream_definition( "replay_stream", parameter_dictionary_id=pdict_id ) replay_stream, replay_route = self.pubsub_management.create_stream( "replay", "xp1", stream_definition_id=stream_def_id ) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt["time"] = np.arange(100) bb.rdt["temp"] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage ) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp("xp1") self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()