def test_coverage_types(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        cov = DatasetManagementService._get_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, ViewCoverage)

        cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, SimplexCoverage)
Esempio n. 2
0
    def setUp(self):
        mock_clients = self._create_service_mock('dataset_management')
        self.dataset_management = DatasetManagementService()
        self.dataset_management.clients = mock_clients

        self.mock_rr_create = self.dataset_management.clients.resource_registry.create
        self.mock_rr_read = self.dataset_management.clients.resource_registry.read
        self.mock_rr_update = self.dataset_management.clients.resource_registry.update
        self.mock_rr_delete = self.dataset_management.clients.resource_registry.delete
    def fill_temporal_gap(self, dataset_id, gap_coverage_path=None, gap_coverage_id=None):
        if gap_coverage_path is None and gap_coverage_id is None:
            raise ValueError('Must specify either \'gap_coverage_path\' or \'gap_coverage_id\'')

        if gap_coverage_path is None:
            gap_coverage_path = self.get_coverage_path(gap_coverage_id)

        from coverage_model import AbstractCoverage
        gap_cov = AbstractCoverage.load(gap_coverage_path)

        self.pause_ingestion(self.get_stream_id(dataset_id))
        DatasetManagementService._splice_coverage(dataset_id, gap_cov)
Esempio n. 4
0
    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)

        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0, -1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)
    def test_coverage_ingest(self):
        stream_id = self.pubsub_management.create_stream()
        dataset_id = self.create_dataset()
        # I freaking hate this bug
        self.get_datastore(dataset_id)
        ingestion_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id
        )

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20)
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id)
        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 1)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(20)
        self.assertTrue(comp.all())

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20) + 20
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 2)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())

        granule = self.data_retriever.retrieve(dataset_id)

        black_box = CoverageCraft()
        black_box.sync_rdt_with_granule(granule)
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())
 def _get_param_dict_by_name(self, name):
     dict_obj = self.RR2.find_resources_by_name(RT.ParameterDictionary,
                                                name)[0]
     parameter_contexts = \
         self.RR2.find_parameter_contexts_of_parameter_dictionary_using_has_parameter_context(dict_obj._id)
     return DatasetManagementService.build_parameter_dictionary(
         dict_obj, parameter_contexts)
Esempio n. 7
0
 def execute_retrieve(self):
     """
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     """
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id, mode="r")
         if coverage.num_timesteps == 0:
             log.info("Reading from an empty coverage")
             rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(
                 coverage=coverage,
                 start_time=self.start_time,
                 end_time=self.end_time,
                 stride_time=self.stride_time,
                 parameters=self.parameters,
                 tdoa=self.tdoa,
             )
     except:
         log.exception("Problems reading from the coverage")
         raise BadRequest("Problems reading from the coverage")
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)
Esempio n. 9
0
    def find_function(self,name):
        res_obj, _ = Container.instance.resource_registry.find_resources(name=name, restype=RT.ParameterFunction, id_only=False)

        if res_obj:
            return res_obj[0]._id, DatasetManagementService.get_coverage_function(res_obj[0])
        else:
            raise KeyError('%s was never loaded' % name)
Esempio n. 10
0
    def get_editable_coverage(self, dataset_id):
        sid = self.get_stream_id(dataset_id)

        # Check if we already have the coverage
        if sid in self._paused_streams:
            cov = self._w_covs[sid]
            # If it's not closed, return it
            if not cov.closed:
                return cov
            # Otherwise, remove it from self._ro_covs and carry on
            del self._w_covs[sid]

        self.pause_ingestion(sid)
        if not self._context_managed:
            warn_user(
                'Warning: Coverages will remain open until they are closed or go out of scope - '
                'be sure to close coverage instances when you are finished working with them or call self.clean_up(w_covs=True)'
            )
        try:
            self._w_covs[sid] = DatasetManagementService._get_simplex_coverage(
                dataset_id, mode='w')
            return self._w_covs[sid]
        except:
            self.resume_ingestion(sid)
            raise
    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() -
                        start) > MAX_RETRY_TIME:  # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error(
                        "We're giving up, the coverage needs to be inspected %s",
                        DatasetManagementService._get_coverage_path(
                            dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2
Esempio n. 12
0
    def delete_dataset(self, agent_instance_id, resource_id):

        res_obj = self.rr.read(resource_id)
        dpms = DataProductManagementServiceProcessClient(process=self)

        # Find data products from device id
        count_ds = 0
        dp_objs, _ = self.rr.find_objects(resource_id, PRED.hasOutputProduct, RT.DataProduct, id_only=False)
        for dp_obj in dp_objs:
            if dpms.is_persisted(dp_obj._id):
                raise BadRequest("DataProduct %s '%s' is currently persisted", dp_obj._id, dp_obj.name)

            ds_objs, _ = self.rr.find_objects(dp_obj._id, PRED.hasDataset, RT.Dataset, id_only=False)
            for ds_obj in ds_objs:
                # Delete coverage
                cov_path = DatasetManagementService._get_coverage_path(ds_obj._id)
                if os.path.exists(cov_path):
                    log.info("Removing coverage tree at %s", cov_path)
                    shutil.rmtree(cov_path)
                else:
                    raise OSError("Coverage path does not exist %s" % cov_path)

                # Delete Dataset and associations
                self.rr.delete(ds_obj._id)
                count_ds += 1

        log.info("Datasets and coverages deleted for device %s '%s': %s", resource_id, res_obj.name, count_ds)
    def test_coverage_recovery(self):
        # Create the coverage
        dp_id, stream_id, route, stream_def_id, dataset_id = self.load_data_product()
        self.populate_dataset(dataset_id, 36)
        dset = self.dataset_management.read_dataset(dataset_id)
        dprod = self.dpsc_cli.read_data_product(dp_id)
        cov = DatasetManagementService._get_simplex_coverage(dataset_id)
        cov_pth = cov.persistence_dir
        cov.close()

        # Analyze the valid coverage
        dr = CoverageDoctor(cov_pth, dprod, dset)
        dr_result = dr.analyze()

        # Get original values (mock)
        orig_cov = AbstractCoverage.load(cov_pth)
        time_vals_orig = orig_cov.get_time_values()

        # TODO: Destroy the metadata files

        # TODO: RE-analyze coverage

        # TODO: Should be corrupt, take action to repair if so

        # Repair the metadata files
        dr.repair_metadata()

        # TODO: Re-analyze fixed coverage

        fixed_cov = AbstractCoverage.load(cov_pth)
        self.assertIsInstance(fixed_cov, AbstractCoverage)

        time_vals_fixed = fixed_cov.get_time_values()
        self.assertTrue(np.array_equiv(time_vals_orig, time_vals_fixed))
    def test_coverage_recovery(self):
        # Create the coverage
        dp_id, stream_id, route, stream_def_id, dataset_id = self.load_data_product()
        self.populate_dataset(dataset_id, 36)
        dset = self.dataset_management.read_dataset(dataset_id)
        dprod = self.dpsc_cli.read_data_product(dp_id)
        cov = DatasetManagementService._get_simplex_coverage(dataset_id)
        cov_pth = cov.persistence_dir
        cov.close()

        # Analyze the valid coverage
        dr = CoverageDoctor(cov_pth, dprod, dset)

        dr_result = dr.analyze()

        # TODO: Turn these into meaningful Asserts
        self.assertEqual(len(dr_result.get_brick_corruptions()), 0)
        self.assertEqual(len(dr_result.get_brick_size_ratios()), 8)
        self.assertEqual(len(dr_result.get_corruptions()), 0)
        self.assertEqual(len(dr_result.get_master_corruption()), 0)
        self.assertEqual(len(dr_result.get_param_corruptions()), 0)
        self.assertEqual(len(dr_result.get_param_size_ratios()), 64)
        self.assertEqual(len(dr_result.get_master_size_ratio()), 1)
        self.assertEqual(len(dr_result.get_size_ratios()), 73)
        self.assertEqual(dr_result.master_status[1], 'NORMAL')

        self.assertFalse(dr_result.is_corrupt)
        self.assertEqual(dr_result.param_file_count, 64)
        self.assertEqual(dr_result.brick_file_count, 8)
        self.assertEqual(dr_result.total_file_count, 73)

        # Get original values (mock)
        orig_cov = AbstractCoverage.load(cov_pth)
        time_vals_orig = orig_cov.get_time_values()
        orig_cov.close()

        # Corrupt the Master File
        fo = open(cov._persistence_layer.master_manager.file_path, "wb")
        fo.write('Junk')
        fo.close()
        # Corrupt the lon Parameter file
        fo = open(cov._persistence_layer.parameter_metadata['lon'].file_path, "wb")
        fo.write('Junk')
        fo.close()

        corrupt_res = dr.analyze(reanalyze=True)
        self.assertTrue(corrupt_res.is_corrupt)

        # Repair the metadata files
        dr.repair(reanalyze=True)

        fixed_res = dr.analyze(reanalyze=True)
        self.assertFalse(fixed_res.is_corrupt)

        fixed_cov = AbstractCoverage.load(cov_pth)
        self.assertIsInstance(fixed_cov, AbstractCoverage)

        time_vals_fixed = fixed_cov.get_time_values()
        fixed_cov.close()
        self.assertTrue(np.array_equiv(time_vals_orig, time_vals_fixed))
    def test_get_dataset_to_xml(self):
        dataset_id = self._make_dataset()
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        cov = SimplexCoverage.load(coverage_path)

        xml_str = self.rp.get_dataset_xml(coverage_path)
        dom = parseString(xml_str)
        node = dom.getElementsByTagName('addAttributes')

        metadata = node[0]
        for n in metadata.childNodes:
            if n.nodeType != 3:
                if n.attributes["name"].value == "title":
                    self.assertEquals(cov.name, n.childNodes[0].nodeValue)
                if n.attributes["name"].value == "institution":
                    self.assertEquals('OOI', n.childNodes[0].nodeValue)
                if n.attributes["name"].value == "infoUrl":
                    self.assertEquals(self.rp.pydap_url + cov.name,
                                      n.childNodes[0].nodeValue)
        parameters = []
        node = dom.getElementsByTagName('sourceName')
        for n in node:
            if n.nodeType != 3:
                parameters.append(str(n.childNodes[0].nodeValue))
        cov_params = [key for key in cov.list_parameters()]
        self.assertEquals(parameters, cov_params)
        cov.close()
Esempio n. 16
0
    def get_pfunc(self, pfid):
        # Preload Case
        if not pfid:
            raise TypeError('No parameter function id specified')
        if pfid.startswith('PFID'):
            if pfid not in self.resource_objs:
                raise KeyError('Function %s was not loaded' % pfid)

            pf = self.resource_objs[pfid]
            func = DatasetManagementService.get_coverage_function(pf)
            return func
        # System Case
        else:
            pf = Container.instance.resource_registry.read(pfid)
            func = DatasetManagementService.get_coverage_function(pf)
            return func
Esempio n. 17
0
    def get_last_granule(cls, container, dataset_id):
        dsm_cli = DatasetManagementServiceClient()
        dataset = dsm_cli.read_dataset(dataset_id)
        cc = container
        datastore_name = dataset.datastore_name
        view_name = dataset.view_name
        
        datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)

        opts = dict(
            start_key = [dataset_id, {}],
            end_key   = [dataset_id, 0], 
            descending = True,
            limit = 1,
            include_docs = True
        )

        results = datastore.query_view(view_name,opts=opts)
        if not results:
            raise NotFound('A granule could not be located.')
        if results[0] is None:
            raise NotFound('A granule could not be located.')
        doc = results[0].get('doc')
        if doc is None:
            return None

        ts = float(doc.get('ts_create',0))

        coverage = DatasetManagementService._get_coverage(dataset_id)

        rdt = cls._coverage_to_granule(coverage,tdoa=slice(cls.get_relative_time(coverage,ts),None))
        coverage.close(timeout=5)
        return rdt.to_granule()
Esempio n. 18
0
    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id)

        # Make sure there is an assoc
        self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id)
        self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary))


        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id)
        self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        
        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)


        self.assertTrue(event.wait(10))

        sub.stop()
Esempio n. 20
0
    def get_coverage_path(self, dataset_id):
        pth = DatasetManagementService._get_coverage_path(dataset_id)
        if not os.path.exists(pth):
            raise ValueError(
                'Coverage with id \'{0}\' does not exist!'.format(dataset_id))

        return pth
Esempio n. 21
0
 def execute_retrieve(self):
     '''
     execute_retrieve Executes a retrieval and returns the result 
     as a value in lieu of publishing it on a stream
     '''
     try:
         coverage = DatasetManagementService._get_coverage(self.dataset_id,
                                                           mode='r')
         if coverage.num_timesteps == 0:
             log.info('Reading from an empty coverage')
             rdt = RecordDictionaryTool(
                 param_dictionary=coverage.parameter_dictionary)
         else:
             rdt = self._coverage_to_granule(coverage=coverage,
                                             start_time=self.start_time,
                                             end_time=self.end_time,
                                             stride_time=self.stride_time,
                                             parameters=self.parameters,
                                             tdoa=self.tdoa)
     except:
         log.exception('Problems reading from the coverage')
         raise BadRequest('Problems reading from the coverage')
     finally:
         coverage.close(timeout=5)
     return rdt.to_granule()
 def get_coverage(cls, data_product_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     if not data_product_id:
         return
     try:
         result, ts = cls._coverages.pop(data_product_id)
         if (time.time() - ts) > cls.CACHE_EXPIRATION:
             result.close()
             raise KeyError(data_product_id)
     except KeyError:
         if data_product_id is None:
             return None
         resource_registry = Container.instance.resource_registry
         dataset_ids, _ = resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)
         if not dataset_ids: return None
         dataset_id = dataset_ids[0]
         result = DatasetManagementService._get_coverage(dataset_id, mode='r')
         result.value_caching = False
         ts = time.time()
         if result is None:
             return None
         if len(cls._coverages) >= cls.CACHE_LIMIT:
             key, value = cls._coverages.popitem(0)
             coverage, ts = value
             coverage.close(timeout=5)
     cls._coverages[dataset_id] = result, ts
     return result
Esempio n. 23
0
    def get_last_granule(cls, container, dataset_id):
        dsm_cli = DatasetManagementServiceClient()
        dataset = dsm_cli.read_dataset(dataset_id)
        cc = container
        datastore_name = dataset.datastore_name
        view_name = dataset.view_name
        
        datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)

        opts = dict(
            start_key = [dataset_id, {}],
            end_key   = [dataset_id, 0], 
            descending = True,
            limit = 1,
            include_docs = True
        )

        results = datastore.query_view(view_name,opts=opts)
        if not results:
            raise NotFound('A granule could not be located.')
        if results[0] is None:
            raise NotFound('A granule could not be located.')
        doc = results[0].get('doc')
        if doc is None:
            return None

        ts = float(doc.get('ts_create',0))

        coverage = DatasetManagementService._get_coverage(dataset_id)
        
        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage(start_time=ts,end_time=None)
        granule = black_box.to_granule()

        return granule
Esempio n. 24
0
 def get_last_values(cls, dataset_id):
     coverage = DatasetManagementService._get_coverage(dataset_id)
     
     black_box = CoverageCraft(coverage)
     black_box.sync_rdt_with_coverage(tdoa=slice(-1,None))
     granule = black_box.to_granule()
     return granule
Esempio n. 25
0
    def delete_dataset(self, agent_instance_id, resource_id):
        """Deletes dataset and coverage files for all of a device's data products"""
        res_obj = self.rr.read(resource_id)
        dpms = DataProductManagementServiceProcessClient(process=self)

        # Find data products from device id
        count_ds = 0
        dp_objs, _ = self.rr.find_objects(resource_id, PRED.hasOutputProduct, RT.DataProduct, id_only=False)
        for dp_obj in dp_objs:
            if dpms.is_persisted(dp_obj._id, headers=self._get_system_actor_headers()):
                if self.force:
                    log.warn("DataProduct %s '%s' is currently persisted - continuing", dp_obj._id, dp_obj.name)
                else:
                    raise BadRequest("DataProduct %s '%s' is currently persisted. Use force=True to ignore", dp_obj._id, dp_obj.name)

            ds_objs, _ = self.rr.find_objects(dp_obj._id, PRED.hasDataset, RT.Dataset, id_only=False)
            for ds_obj in ds_objs:
                # Delete coverage
                cov_path = DatasetManagementService._get_coverage_path(ds_obj._id)
                if os.path.exists(cov_path):
                    log.info("Removing coverage tree at %s", cov_path)
                    shutil.rmtree(cov_path)
                else:
                    log.warn("Coverage path does not exist %s" % cov_path)

                # Delete Dataset and associations
                self.rr.delete(ds_obj._id)
                count_ds += 1

        log.info("Datasets and coverages deleted for device %s '%s': %s", resource_id, res_obj.name, count_ds)
Esempio n. 26
0
    def persist_or_timeout(self, stream_id, rdt):
        '''
        A loop that tries to parse and store a granule for up to five minutes,
        and waits an increasing amount of time each iteration.
        '''
        done = False
        timeout = 2
        start = time.time()
        while not done:
            if self.parse_granule(stream_id, rdt, start, done):
                return  # We're all done, everything worked

            if (time.time() -
                    start) > MAX_RETRY_TIME:  # After a while, give up
                dataset_id = self.get_dataset(stream_id)
                log.error(
                    "We're giving up, the coverage needs to be inspected %s",
                    DatasetManagementService._get_coverage_path(dataset_id))
                raise

            if stream_id in self._coverages:
                log.info('Popping coverage for stream %s', stream_id)
                self._coverages.pop(stream_id)

            gevent.sleep(timeout)

            timeout = min(60 * 5, timeout * 2)
Esempio n. 27
0
 def get_coverage(cls, data_product_id):
     '''
     Memoization (LRU) of _get_coverage
     '''
     if not data_product_id:
         return
     try:
         result, ts = cls._coverages.pop(data_product_id)
         if (time.time() - ts) > cls.CACHE_EXPIRATION:
             result.close()
             raise KeyError(data_product_id)
     except KeyError:
         if data_product_id is None:
             return None
         resource_registry = Container.instance.resource_registry
         dataset_ids, _ = resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)
         if not dataset_ids: return None
         dataset_id = dataset_ids[0]
         result = DatasetManagementService._get_coverage(dataset_id,
                                                         mode='r')
         result.value_caching = False
         ts = time.time()
         if result is None:
             return None
         if len(cls._coverages) >= cls.CACHE_LIMIT:
             key, value = cls._coverages.popitem(0)
             coverage, ts = value
             coverage.close(timeout=5)
     cls._coverages[dataset_id] = result, ts
     return result
Esempio n. 28
0
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)

        self.assertTrue(event.wait(10))

        sub.stop()
    def dead_man_timeout(self, stream_id, callback, *args, **kwargs):
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                callback(*args, **kwargs)
                done = True
            except:
                log.exception("An issue with coverage, retrying after a bit")
                if (time.time() - start) > 3600:  # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error(
                        "We're giving up, the coverage needs to be inspected %s",
                        DatasetManagementService._get_coverage_path(dataset_id),
                    )
                    raise

                if stream_id in self._coverages:
                    log.info("Popping coverage for stream %s", stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2
Esempio n. 30
0
    def get_pfunc(self,pfid):
        # Preload Case
        if not pfid:
            raise TypeError('No parameter function id specified')
        if pfid.startswith('PFID'):
            if pfid not in self.resource_objs: 
                raise KeyError('Function %s was not loaded' % pfid)

            pf = self.resource_objs[pfid]
            func = DatasetManagementService.get_coverage_function(pf)
            return func
        # System Case
        else:
            pf = Container.instance.resource_registry.read(pfid)
            func = DatasetManagementService.get_coverage_function(pf)
            return func
    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()
    def get_editable_coverage(self, dataset_id):
        sid = self.get_stream_id(dataset_id)
        if sid in self._paused_streams:
            return self._w_covs[sid]

        self.pause_ingestion(sid)
        self._w_covs[sid] = DatasetManagementService._get_simplex_coverage(dataset_id, mode='w')
        return self._w_covs[sid]
    def add_granule(self,stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info('Message attempting to be inserted into bad coverage: %s',
                     DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            
        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s', stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error("Couldn't open coverage: %s",
                      DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error('Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)
        
        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0,-1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)
 def register_dap_dataset(self, dataset_id, data_product_name=''):
     coverage_path = DatasetManagementService._get_coverage_path(dataset_id)
     try:
         self.add_dataset_to_xml(coverage_path=coverage_path, product_name=data_product_name)
         self.create_symlink(coverage_path, self.pydap_data_path)
     except: # We don't re-raise to prevent clients from bombing out...
         log.exception('Problem registering dataset')
         log.error('Failed to register dataset for coverage path %s' % coverage_path)
    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()
Esempio n. 36
0
    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))
Esempio n. 37
0
    def _generate_stream_config(self):
        dsm = self.clients.dataset_management
        psm = self.clients.pubsub_management

        agent_obj  = self._get_agent()
        device_obj = self._get_device()

        streams_dict = {}
        for stream_cfg in agent_obj.stream_configurations:
            #create a stream def for each param dict to match against the existing data products
            param_dict_id = dsm.read_parameter_dictionary_by_name(stream_cfg.parameter_dictionary_name,
                                                                  id_only=True)
            stream_def_id = psm.create_stream_definition(parameter_dictionary_id=param_dict_id)
            streams_dict[stream_cfg.stream_name] = {'param_dict_name':stream_cfg.parameter_dictionary_name,
                                                    'stream_def_id':stream_def_id,
                                                    'records_per_granule': stream_cfg.records_per_granule,
                                                    'granule_publish_rate':stream_cfg.granule_publish_rate,
                                                    'alarms'              :stream_cfg.alarms  }

        #retrieve the output products
        device_id = device_obj._id
        data_product_ids = self.RR2.find_data_product_ids_of_instrument_device_using_has_output_product(device_id)

        out_streams = []
        for product_id in data_product_ids:
            stream_id = self.RR2.find_stream_id_of_data_product(product_id)
            out_streams.append(stream_id)


        stream_config = {}

        log.debug("Creating a stream config got each stream (dataproduct) assoc with this agent/device")
        for product_stream_id in out_streams:

            #get the streamroute object from pubsub by passing the stream_id
            stream_def_id = self.RR2.find_stream_definition_id_of_stream(product_stream_id)

            #match the streamdefs/apram dict for this model with the data products attached to this device to know which tag to use
            for model_stream_name, stream_info_dict  in streams_dict.items():

                if self.clients.pubsub_management.compare_stream_definition(stream_info_dict.get('stream_def_id'),
                                                                            stream_def_id):
                    model_param_dict = DatasetManagementService.get_parameter_dictionary_by_name(stream_info_dict.get('param_dict_name'))
                    stream_route = self.clients.pubsub_management.read_stream_route(stream_id=product_stream_id)

                    stream_config[model_stream_name] = {'routing_key'           : stream_route.routing_key,
                                                            'stream_id'             : product_stream_id,
                                                            'stream_definition_ref' : stream_def_id,
                                                            'exchange_point'        : stream_route.exchange_point,
                                                            'parameter_dictionary'  : model_param_dict.dump(),
                                                            'records_per_granule'  : stream_info_dict.get('records_per_granule'),
                                                            'granule_publish_rate'  : stream_info_dict.get('granule_publish_rate'),
                                                            'alarms'                : stream_info_dict.get('alarms')
                    }

        log.debug("Stream config generated")
        log.trace("generate_stream_config: %s", str(stream_config) )
        return stream_config
    def _generate_stream_config(self):
        dsm = self.clients.dataset_management
        psm = self.clients.pubsub_management

        agent_obj  = self._get_agent()
        device_obj = self._get_device()

        streams_dict = {}
        for stream_cfg in agent_obj.stream_configurations:
            #create a stream def for each param dict to match against the existing data products
            param_dict_id = dsm.read_parameter_dictionary_by_name(stream_cfg.parameter_dictionary_name,
                                                                  id_only=True)
            stream_def_id = psm.create_stream_definition(parameter_dictionary_id=param_dict_id)
            streams_dict[stream_cfg.stream_name] = {'param_dict_name':stream_cfg.parameter_dictionary_name,
                                                    'stream_def_id':stream_def_id,
                                                    'records_per_granule': stream_cfg.records_per_granule,
                                                    'granule_publish_rate':stream_cfg.granule_publish_rate,
                                                    'alarms'              :stream_cfg.alarms  }

        #retrieve the output products
        device_id = device_obj._id
        data_product_ids = self.RR2.find_data_product_ids_of_instrument_device_using_has_output_product(device_id)

        out_streams = []
        for product_id in data_product_ids:
            stream_id = self.RR2.find_stream_id_of_data_product(product_id)
            out_streams.append(stream_id)


        stream_config = {}

        log.debug("Creating a stream config got each stream (dataproduct) assoc with this agent/device")
        for product_stream_id in out_streams:

            #get the streamroute object from pubsub by passing the stream_id
            stream_def_id = self.RR2.find_stream_definition_id_of_stream(product_stream_id)

            #match the streamdefs/apram dict for this model with the data products attached to this device to know which tag to use
            for model_stream_name, stream_info_dict  in streams_dict.items():

                if self.clients.pubsub_management.compare_stream_definition(stream_info_dict.get('stream_def_id'),
                                                                            stream_def_id):
                    model_param_dict = DatasetManagementService.get_parameter_dictionary_by_name(stream_info_dict.get('param_dict_name'))
                    stream_route = self.clients.pubsub_management.read_stream_route(stream_id=product_stream_id)

                    stream_config[model_stream_name] = {'routing_key'           : stream_route.routing_key,
                                                            'stream_id'             : product_stream_id,
                                                            'stream_definition_ref' : stream_def_id,
                                                            'exchange_point'        : stream_route.exchange_point,
                                                            'parameter_dictionary'  : model_param_dict.dump(),
                                                            'records_per_granule'  : stream_info_dict.get('records_per_granule'),
                                                            'granule_publish_rate'  : stream_info_dict.get('granule_publish_rate'),
                                                            'alarms'                : stream_info_dict.get('alarms')
                    }

        log.debug("Stream config generated")
        log.trace("generate_stream_config: %s", str(stream_config) )
        return stream_config
Esempio n. 39
0
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *
                          2, temporal_bounds)
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov,ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)
    def check_rsn_instrument_data_product(self):
        passing = True
        # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products
        # Parsed, Engineering
        # SFLPRES-0 SFLPRES-1
        # Check for the two data products and make sure they have the proper parameters
        # SFLPRES-0 should 
        data_products, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True)
        passing &=self.assertTrue(len(data_products)==1)
        if not data_products:
            return passing

        data_product_id = data_products[0]
        
        stream_defs, _ = self.RR.find_objects(data_product_id,PRED.hasStreamDefinition,id_only=False)
        passing &= self.assertTrue(len(stream_defs)==1)
        if not stream_defs:
            return passing

        # Assert that the stream definition has the correct reference designator
        stream_def = stream_defs[0]
        passing &= self.assertEquals(stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301')

        # Get the pdict and make sure that the parameters corresponding to the available fields 
        # begin with the appropriate data product identifier

        pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True)
        passing &= self.assertEquals(len(pdict_ids), 1)
        if not pdict_ids:
            return passing

        pdict_id = pdict_ids[0]
        
        pdict = DatasetManagementService.get_parameter_dictionary(pdict_id)
        available_params = [pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields]
        for p in available_params:
            if p.name=='time': # Ignore the domain parameter
                continue
            passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES'))
        passing &= self.check_presta_instrument_data_products('RS01SLBS-MJ01A-06-PRESTA101')
        passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101')
        passing &= self.check_presta_instrument_data_products('RS03AXBS-MJ03A-06-PRESTA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301')
        passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304')
        passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301')

        self.data_product_management.activate_data_product_persistence(data_product_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)
        self.data_product_management.suspend_data_product_persistence(data_product_id) # Should do nothing and not raise anything

        
        return passing
    def create_simple_qc_pdict(self):
        types_manager = TypesManager(self.dataset_management,None,None)
        contexts = self.create_simple_qc()
        context_ids = [i[1] for i in contexts.itervalues()]
        context_ids.extend(contexts['temp'][0].qc_contexts)
        for qc_context in contexts['temp'][0].qc_contexts:
            context_ids.extend(types_manager.get_lookup_value_ids(DatasetManagementService.get_parameter_context(qc_context)))
        context_ids.extend(contexts['pressure'][0].qc_contexts)
        for qc_context in contexts['pressure'][0].qc_contexts:
            context_ids.extend(types_manager.get_lookup_value_ids(DatasetManagementService.get_parameter_context(qc_context)))
        context_names = [self.dataset_management.read_parameter_context(i).name for i in context_ids]
        qc_names = [i for i in context_names if i.endswith('_qc')]
        ctxt_id, pc = types_manager.make_propagate_qc(qc_names)
        context_ids.append(ctxt_id)
        pdict_id = self.dataset_management.create_parameter_dictionary('simple_qc', parameter_context_ids=context_ids, temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id
Esempio n. 43
0
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov, ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)
Esempio n. 44
0
class DatasetManagementTest(PyonTestCase):
    def setUp(self):
        mock_clients = self._create_service_mock('dataset_management')
        self.dataset_management = DatasetManagementService()
        self.dataset_management.clients = mock_clients

        self.mock_rr_create = self.dataset_management.clients.resource_registry.create
        self.mock_rr_read = self.dataset_management.clients.resource_registry.read
        self.mock_rr_update = self.dataset_management.clients.resource_registry.update
        self.mock_rr_delete = self.dataset_management.clients.resource_registry.delete

    def test_create_dataset(self):
        # mocks
        self.mock_rr_create.return_value = ('dataset_id','rev')

        # execution
        dataset_id = self.dataset_management.create_dataset(name='123',stream_id='123',datastore_name='fake_datastore')


        # assertions
        self.assertEquals(dataset_id,'dataset_id')
        self.assertTrue(self.mock_rr_create.called)

    def test_update_dataset(self):
        # mocks
        mock_dataset = DotDict({'_id':'dataset_id'})


        # execution
        self.dataset_management.update_dataset(mock_dataset)


        # assertions
        self.mock_rr_update.assert_called_with(mock_dataset)

    def test_delete_dataset(self):
        # mocks

        # execution
        self.dataset_management.delete_dataset('123')

        # assertions
        self.mock_rr_delete.assert_called_with('123')
Esempio n. 45
0
    def find_function(self, name):
        res_obj, _ = Container.instance.resource_registry.find_resources(
            name=name, restype=RT.ParameterFunction, id_only=False)

        if res_obj:
            return res_obj[
                0]._id, DatasetManagementService.get_coverage_function(
                    res_obj[0])
        else:
            raise KeyError('%s was never loaded' % name)
    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [context_id for ctxt,context_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        expr, expr_id = funcs['CONDWAT_L1']
        func_class = DatasetManagementService.get_parameter_function(expr_id)
        self.assertIsInstance(func_class, NumexprFunction)
Esempio n. 47
0
 def register_dap_dataset(self, dataset_id, data_product_name=''):
     coverage_path = DatasetManagementService._get_coverage_path(dataset_id)
     try:
         self.add_dataset_to_xml(coverage_path=coverage_path,
                                 product_name=data_product_name)
         self.create_symlink(coverage_path, self.pydap_data_path)
     except:  # We don't re-raise to prevent clients from bombing out...
         log.exception('Problem registering dataset')
         log.error('Failed to register dataset for coverage path %s' %
                   coverage_path)
Esempio n. 48
0
    def test_get_data_from_FDW(self):
        # generate a data product and check that the FDW can get data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        print coverage_path
        print "--------------------------------"

        #verify table exists in the DB (similar to above)
        # ....code...

        # check that the geoserver layer exists as above
        # ... code ....

        # make a WMS/WFS request...somet like this (or both)
        url = self.gs_host + '/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv'
        r = requests.get(url)
        assertTrue(r.status_code == 200)
Esempio n. 49
0
    def test_create_dataset_verify_geoserver_layer(self):
        #generate layer and check that the service created it in geoserver
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        # verify that the layer exists in geoserver
        try:
            r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id +
                             '_ooi.xml',
                             auth=(self.username, self.PASSWORD))
            self.assertTrue(r.status_code == 200)
        except Exception as e:
            log.error("check service and layer exist...%s", e)
            self.assertTrue(False)
Esempio n. 50
0
    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        ctxt = self.dataset_management.read_parameter_context(context_id)
        context = DatasetManagementService.get_coverage_parameter(ctxt)
        self.assertIsInstance(context, CoverageParameterContext)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)
Esempio n. 51
0
    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        context = DatasetManagementService.get_parameter_context(context_id)
        self.assertIsInstance(context, ParameterContext)
        self.assertEquals(context.identifier, context_id)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)
Esempio n. 52
0
    def get_last_values(cls, dataset_id, number_of_points):
        coverage = DatasetManagementService._get_coverage(dataset_id, mode='r')
        if coverage.num_timesteps < number_of_points:
            if coverage.num_timesteps == 0:
                rdt = RecordDictionaryTool(
                    param_dictionary=coverage.parameter_dictionary)
                return rdt.to_granule()
            number_of_points = coverage.num_timesteps
        rdt = cls._coverage_to_granule(coverage,
                                       tdoa=slice(-number_of_points, None))
        coverage.close(timeout=5)

        return rdt.to_granule()
 def gap_coverage(self, stream_id):
     try:
         old_cov = self._coverages.pop(stream_id)
         dataset_id = self.get_dataset(stream_id)
         sdom, tdom = time_series_domain()
         new_cov = DatasetManagementService._create_simplex_coverage(
             dataset_id, old_cov.parameter_dictionary, sdom, tdom,
             old_cov._persistence_layer.inline_data_writes)
         old_cov.close()
         result = new_cov
     except KeyError:
         result = self.get_coverage(stream_id)
     self._coverages[stream_id] = result
     return result
Esempio n. 54
0
 def _replay(self):
     coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r')
     rdt = self._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.stream_def_id)
     elements = len(rdt)
     
     for i in xrange(elements / self.publish_limit):
         outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
         fields = self.parameters or outgoing.fields
         for field in fields:
             v = rdt[field]
             if v is not None:
                 outgoing[field] = v[(i*self.publish_limit) : ((i+1)*self.publish_limit)]
         yield outgoing
     coverage.close(timeout=5)
     return 
Esempio n. 55
0
    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [context_id for context_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(
            name='functional_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        expr_id = funcs['CONDWAT_L1']
        expr = self.dataset_management.read_parameter_function(expr_id)
        func_class = DatasetManagementService.get_coverage_function(expr)
        self.assertIsInstance(func_class, NumexprFunction)
Esempio n. 56
0
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(
                dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())