Beispiel #1
0
    def insert_values(self, coverage, rdt, stream_id):

        np_dict = self.build_data_dict(rdt)

        if 'ingestion_timestamp' in coverage.list_parameters():
            timestamps = np.array([(time.time() + 2208988800)
                                   for i in rdt[rdt.temporal_parameter]])
            np_dict['ingestion_timestamp'] = NumpyParameterData(
                'ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter])

        # If it's sparse only
        if self.sparse_only(rdt):
            del np_dict[rdt.temporal_parameter]

        try:
            coverage.set_parameter_values(np_dict)
        except IOError as e:
            log.error("Couldn't insert values for coverage: %s",
                      coverage.persistence_dir,
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
        except KeyError as e:
            if 'has not been initialized' in e.message:
                coverage.refresh()
            raise
        except Exception as e:
            print repr(rdt)
            raise
    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k, v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k,
                                              tdoa=slice_,
                                              value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(
                coverage.get_parameter_context('ingestion_timestamp').uom,
                t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp',
                                          tdoa=slice_,
                                          value=ntp_time)
Beispiel #3
0
 def map_cov_rdt(cls, coverage, rdt, field, slice_):
     log.trace('Slice is %s', slice_)
     try:
         n = coverage.get_parameter_values(field, tdoa=slice_)
     except ParameterFunctionException:
         return
     if n is None:
         rdt[field] = [n]
     elif isinstance(n, np.ndarray):
         if coverage.get_data_extents(field)[0] < coverage.num_timesteps:
             log.error(
                 "Misformed coverage detected, padding with fill_value")
             arr_len = utils.slice_shape(slice_,
                                         (coverage.num_timesteps, ))[0]
             fill_arr = np.empty(arr_len - n.shape[0], dtype=n.dtype)
             fill_arr.fill(coverage.get_parameter_context(field).fill_value)
             n = np.append(n, fill_arr)
         elif coverage.get_data_extents(field)[0] > coverage.num_timesteps:
             raise CorruptionError(
                 'The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s'
                 % (field, coverage.get_data_extents(field),
                    coverage.num_timesteps))
         rdt[field] = np.atleast_1d(n)
     else:
         rdt[field] = [n]
    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt.fields:
            if rdt[field] is None:
                continue
            if not isinstance(
                    rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except ValueError as e:
                if "'lower_bound' cannot be >= 'upper_bound'" in e.message:
                    continue
                else:
                    raise
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)
 def expand_coverage(self, coverage, elements, stream_id):
     try:
         coverage.insert_timesteps(elements, oob=False)
     except IOError as e:
         log.error("Couldn't insert time steps for coverage: %s",
                   coverage.persistence_dir,
                   exc_info=True)
         try:
             coverage.close()
         finally:
             self._bad_coverages[stream_id] = 1
             raise CorruptionError(e.message)
Beispiel #6
0
    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)

        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0, -1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)
Beispiel #7
0
 def _test_origin():
     raise CorruptionError()
Beispiel #8
0
 def _raise_corruption_error():
     raise CorruptionError()
    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        if not self.ignore_gaps:
            gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
            if gap_found:
                log.error(
                    'Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)',
                    rdt.connection_id, rdt.connection_index,
                    self.connection_id, self.connection_index)
                self.gap_coverage(stream_id)

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug(
                '%s: add_granule stream %s dataset %s coverage %r file %s',
                self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)
        if rdt[rdt.temporal_parameter] is None:
            elements = 0

        self.insert_sparse_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('checks')  # lightweight ops, should be zero

        self.expand_coverage(coverage, elements, stream_id)

        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('keys')

        DatasetManagementService._save_coverage(coverage)

        if debugging:
            timer.complete_step('save')

        start_index = coverage.num_timesteps - elements
        self.dataset_changed(dataset_id, coverage.num_timesteps,
                             (start_index, start_index + elements))

        if not self.ignore_gaps and gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)

        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)
Beispiel #10
0
    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return
        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug(
                '%s: add_granule stream %s dataset %s coverage %r file %s',
                self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------
        elements = len(rdt)
        if debugging:
            timer.complete_step('checks')  # lightweight ops, should be zero
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      DatasetManagementService._get_coverage_path(
                          self.get_dataset(stream_id)),
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
        if debugging:
            timer.complete_step('insert')

        start_index = coverage.num_timesteps - elements

        for k, v in rdt.iteritems():
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k,
                                              tdoa=slice_,
                                              value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          DatasetManagementService._get_coverage_path(
                              self.get_dataset(stream_id)),
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)
        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(
                coverage.get_parameter_context('ingestion_timestamp').uom,
                t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp',
                                          tdoa=slice_,
                                          value=ntp_time)
        if debugging:
            timer.complete_step('keys')
        DatasetManagementService._save_coverage(coverage)
        if debugging:
            timer.complete_step('save')
        self.dataset_changed(dataset_id, coverage.num_timesteps,
                             (start_index, start_index + elements))
        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)
    def _coverage_to_granule(cls,
                             coverage,
                             start_time=None,
                             end_time=None,
                             stride_time=None,
                             fuzzy_stride=True,
                             parameters=None,
                             stream_def_id=None,
                             tdoa=None):
        slice_ = slice(None)  # Defaults to all values

        # Validations
        if start_time is not None:
            validate_is_instance(start_time, Number,
                                 'start_time must be a number for striding.')
        if end_time is not None:
            validate_is_instance(end_time, Number,
                                 'end_time must be a number for striding.')
        if stride_time is not None:
            validate_is_instance(stride_time, Number,
                                 'stride_time must be a number for striding.')

        if tdoa is not None and isinstance(tdoa, slice):
            slice_ = tdoa

        elif stride_time is not None and not fuzzy_stride:  # SLOW
            ugly_range = np.arange(start_time, end_time, stride_time)
            idx_values = [cls.get_time_idx(coverage, i) for i in ugly_range]
            idx_values = list(
                set(idx_values)
            )  # Removing duplicates - also mixes the order of the list!!!
            idx_values.sort()
            slice_ = [idx_values]

        elif not (start_time is None and end_time is None):
            if start_time is not None:
                start_time = cls.get_time_idx(coverage, start_time)
            if end_time is not None:
                end_time = cls.get_time_idx(coverage, end_time)

            slice_ = slice(start_time, end_time, stride_time)
            log.info('Slice: %s', slice_)

        if stream_def_id:
            rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        else:
            rdt = RecordDictionaryTool(
                param_dictionary=coverage.parameter_dictionary)
        if parameters is not None:
            # TODO: Improve efficiency here
            fields = list(set(parameters).intersection(rdt.fields))
        else:
            fields = rdt.fields

        for field in fields:
            log.info('Slice is %s', slice_)
            n = coverage.get_parameter_values(field, tdoa=slice_)
            if n is None:
                rdt[field] = [n]
            elif isinstance(n, np.ndarray):
                if coverage.get_data_extents(
                        field)[0] < coverage.num_timesteps:
                    log.error(
                        "Misformed coverage detected, padding with fill_value")
                    arr_len = utils.slice_shape(slice_,
                                                (coverage.num_timesteps, ))[0]
                    fill_arr = np.empty(arr_len - n.shape[0], dtype=n.dtype)
                    fill_arr.fill(
                        coverage.get_parameter_context(field).fill_value)
                    n = np.append(n, fill_arr)
                elif coverage.get_data_extents(
                        field)[0] > coverage.num_timesteps:
                    raise CorruptionError(
                        'The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s'
                        % (field, coverage.get_data_extents(field),
                           coverage.num_timesteps))
                rdt[field] = n
            else:
                rdt[field] = [n]
        return rdt