コード例 #1
0
ファイル: ccdc.py プロジェクト: sumesh1/yatsm
 def time_ccdcesque1(self, setup):
     """ Bench with 'defaults' defined in setup with most tests turned off
     """
     kwargs = version_kwargs(setup['kwargs'])
     for i in range(n):
         model = CCDCesque(**kwargs)
         model.fit(setup['X'], setup['Y'], setup['dates'])
コード例 #2
0
ファイル: change.py プロジェクト: xjtang/yatsm
def pixel_CCDCesque(pipe, require, output, config=None):
    """ Run :class:`yatsm.algorithms.CCDCesque` on a pixel

    Users should pass to ``require`` both ``X`` and ``Y`` arguments, which
    are interpreted as:

    .. code-block:: python

        X, Y = require[0], require[1:]

    Args:
        pipe (yatsm.pipeline.Pipe): Piped data to operate on
        require (dict[str, list[str]]): Labels for the requirements of this
            calculation
        output (dict[str, list[str]]): Label for the result of this
            calculation
        config (dict): Configuration to pass to :class:`CCDCesque`. Should
            contain `init` section

    Returns:
        yatsm.pipeline.Pipe: Piped output

    """
    XY = pipe.data[require['data']].dropna('time', how='any')
    X = XY[require['data'][0]]
    Y = XY[require['data'][1:]].to_array()

    model = CCDCesque(**config.get('init', {}))
    model.py, model.px = Y.y, Y.x

    model = model.fit(X, Y.values, XY['ordinal'])
    pipe.record[output[RECORD][0]] = model.record

    return pipe
コード例 #3
0
ファイル: ccdc.py プロジェクト: ceholden/yatsm
 def time_ccdcesque1(self, setup):
     """ Bench with 'defaults' defined in setup with most tests turned off
     """
     kwargs = version_kwargs(setup['kwargs'])
     for i in range(n):
         model = CCDCesque(**kwargs)
         model.fit(setup['X'], setup['Y'], setup['dates'])
コード例 #4
0
ファイル: ccdc.py プロジェクト: sumesh1/yatsm
 def time_ccdcesque3(self, setup):
     """ Bench with remove_noise, dynamic_rmse turned on
     """
     kwargs = version_kwargs(setup['kwargs'])
     kwargs.update({'remove_noise': True, 'dynamic_rmse': True})
     for i in range(n):
         model = CCDCesque(**kwargs)
         model.fit(setup['X'], setup['Y'], setup['dates'])
コード例 #5
0
ファイル: ccdc.py プロジェクト: ceholden/yatsm
 def time_ccdcesque2(self, setup):
     """ Bench with remove_noise turned on
     """
     kwargs = version_kwargs(setup['kwargs'])
     kwargs.update({'remove_noise': True})
     for i in range(n):
         model = CCDCesque(**kwargs)
         model.fit(setup['X'], setup['Y'], setup['dates'])
コード例 #6
0
ファイル: ccdc.py プロジェクト: sumesh1/yatsm
 def time_ccdcesque1(self, setup):
     """ Bench with 'defaults' defined in setup with most tests turned off
     """
     kwargs = version_kwargs(setup['kwargs'])
     model = CCDCesque(**kwargs)
     for col in range(setup['Y'].shape[-1]):
         _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates']
         mask = np.in1d(_Y[-1, :], [0, 1])
         model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
コード例 #7
0
ファイル: ccdc.py プロジェクト: ceholden/yatsm
 def time_ccdcesque1(self, setup):
     """ Bench with 'defaults' defined in setup with most tests turned off
     """
     kwargs = version_kwargs(setup['kwargs'])
     model = CCDCesque(**kwargs)
     for col in range(setup['Y'].shape[-1]):
         _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates']
         mask = np.in1d(_Y[-1, :], [0, 1])
         model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
コード例 #8
0
ファイル: ccdc.py プロジェクト: sumesh1/yatsm
 def time_ccdcesque3(self, setup):
     """ Bench with remove_noise, dynamic_rmse turned on
     """
     kwargs = version_kwargs(setup['kwargs'])
     kwargs.update({'remove_noise': True, 'dynamic_rmse': True})
     model = CCDCesque(**kwargs)
     for col in range(setup['Y'].shape[-1]):
         _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates']
         mask = np.in1d(_Y[-1, :], [0, 1])
         model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
コード例 #9
0
ファイル: ccdc.py プロジェクト: ceholden/yatsm
 def time_ccdcesque2(self, setup):
     """ Bench with remove_noise turned on
     """
     kwargs = version_kwargs(setup['kwargs'])
     kwargs.update({'remove_noise': True})
     model = CCDCesque(**kwargs)
     for col in range(setup['Y'].shape[-1]):
         _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates']
         mask = np.in1d(_Y[-1, :], [0, 1])
         model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
コード例 #10
0
    def _fetch_results_saved(self):
        """ Read YATSM results and return """
        self.yatsm_model = MockResult()
        row, col = self.series[0].py, self.series[0].px

        data_cfg = {
            'output': os.path.join(self.location,
                                   self.config['results_folder'].value),
            'output_prefix': (self.config['results_pattern'].value
                              .replace('*', ''))
        }
        result_filename = get_output_name(data_cfg, row)
        logger.info('Attempting to open: {f}'.format(f=result_filename))

        if not os.path.isfile(result_filename):
            qgis_log('Could not find result for row {r} ({fn})'.format(
                r=row, fn=result_filename))
            return

        z = np.load(result_filename)
        if 'record' not in z.files:
            raise KeyError('Cannot find "record" within saved result ({})'
                           .format(result_filename))
        if 'metadata' not in z.files:
            raise KeyError('Cannot find "metadata" within saved result ({})'
                           .format(result_filename))
        metadata = z['metadata'].item()
        if 'design' not in metadata['YATSM']:
            raise KeyError('Cannot find "design" within saved result metadata '
                           '({})'.format(result_filename))
        self._design_info = metadata['YATSM']['design']

        rec = z['record']
        idx = np.where((rec['px'] == col) & (rec['py'] == row))[0]
        self.yatsm_model.record = rec[idx]
コード例 #11
0
def pixel_CCDCesque(work, require, output, **config):
    """ Run CCDCesque on a pixel
    """
    arr = work['data'][require['data']].dropna('time', how='any').to_array()

    model = CCDCesque(**config.get('init', {}))
    model.py, model.px = arr.y, arr.x

    ordinal = arr.indexes['time'].map(lambda x: x.toordinal())
    design = config.get('fit', {}).get('design', '1 + ordinal')
    X = patsy.dmatrix(design,
                      data=arr,
                      eval_env=patsy.EvalEnvironment.capture())

    work['record'][output['record'][0]] = model.fit(X, arr.values, ordinal)
    return work
コード例 #12
0
    def _fetch_results_saved(self):
        """ Read YATSM results and return """
        self.yatsm_model = MockResult()
        row, col = self.series[0].py, self.series[0].px

        data_cfg = {
            'output':
            os.path.join(self.location, self.config['results_folder'].value),
            'output_prefix':
            (self.config['results_pattern'].value.replace('*', ''))
        }
        result_filename = get_output_name(data_cfg, row)
        logger.info('Attempting to open: {f}'.format(f=result_filename))

        if not os.path.isfile(result_filename):
            qgis_log('Could not find result for row {r} ({fn})'.format(
                r=row, fn=result_filename))
            return

        z = np.load(result_filename)
        if 'record' not in z.files:
            raise KeyError(
                'Cannot find "record" within saved result ({})'.format(
                    result_filename))
        if 'metadata' not in z.files:
            raise KeyError(
                'Cannot find "metadata" within saved result ({})'.format(
                    result_filename))
        metadata = z['metadata'].item()
        if 'design' not in metadata['YATSM']:
            raise KeyError('Cannot find "design" within saved result metadata '
                           '({})'.format(result_filename))
        self._design = metadata['YATSM']['design_matrix']
        self._design_info = metadata['YATSM']['design']

        rec = z['record']
        idx = np.where((rec['px'] == col) & (rec['py'] == row))[0]
        self.yatsm_model.record = rec[idx]
コード例 #13
0
    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(
            self.controls['design'].value, {
                'x': self.series[0].images['ordinal'],
                'sensor': self.series[0].sensor,
                'pr': self.series[0].pathrow
            })
        self._design_info = self.X.design_info.column_name_indexes
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self.config['mask_band'].value[0] - 1, :]
        Y_data = np.delete(self.Y,
                           self.config['mask_band'].value[0] - 1,
                           axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data, self.config['min_values'].value,
                               self.config['max_values'].value).astype(np.bool)
        clear *= valid

        # Setup parameters
        estimator = sklearn.linear_model.Lasso(alpha=20)
        reg = self.controls['regression_type'].value
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    estimator = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning(
                'Using failsafe Lasso(lambda=20) from scikit-learn. '
                'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            estimator=estimator,
            test_indices=self.controls['test_indices'].value,
            consecutive=self.controls['consecutive'].value,
            threshold=self.controls['threshold'].value,
            min_obs=self.controls['min_obs'].value,
            min_rmse=(None if self.controls['enable_min_rmse'].value else
                      self.controls['min_rmse'].value),
            screening_crit=self.controls['screen_crit'].value,
            remove_noise=self.controls['remove_noise'].value,
            dynamic_rmse=self.controls['dynamic_rmse'].value,
        )

        self.yatsm_model = CCDCesque(**version_kwargs(kwargs))
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self.controls['reverse'].value:
            self.yatsm_model.fit(np.flipud(self.X[clear, :]),
                                 np.fliplr(Y_data[:, clear]),
                                 self.dates[clear][::-1])
        else:
            self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear],
                                 self.dates[clear])

        if self.controls['commit_test'].value:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self.controls['commit_alpha'].value)

        # if self.controls['robust_results'].value:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self.config['calc_pheno'].value:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology()
            self.yatsm_model.record = ltm.fit(self.yatsm_model)

        # Restore log level
        logger.setLevel(log_level)
コード例 #14
0
ファイル: timeseries_yatsm.py プロジェクト: ceholden/TSTools
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries):
    """ Timeseries driver for CCDCesque algorithm implemented in YATSM

    Requires a working installation of YATSM. For more information, visit
    the [YATSM Github website](https://github.com/ceholden/yatsm).

    This driver requires the following Python packages in addition to basic
    TSTools package dependencies:

    * [`scikit-learn`](http://scikit-learn.org/stable/)
    * [`patsy`](https://patsy.readthedocs.org/en/latest/)
    * [`yatsm`](https://github.com/ceholden/yatsm)
    """
    description = 'YATSM CCDCesque Timeseries'
    location = None
    mask_values = np.array([2, 3, 4, 255])
    has_results = True

    # Driver configuration
    config = OrderedDict((
        ('stack_pattern', ConfigItem('Stack pattern', 'L*stack')),
        ('date_index', ConfigItem('Date index', [9, 16])),
        ('date_format', ConfigItem('Date format', '%Y%j')),
        ('cache_folder', ConfigItem('Cache folder', 'cache')),
        ('results_folder', ConfigItem('Results folder', 'YATSM')),
        ('results_pattern', ConfigItem('Results pattern', 'yatsm_r*')),
        ('mask_band', ConfigItem('Mask band', [8])),
        ('min_values', ConfigItem('Min data values', [0])),
        ('max_values', ConfigItem('Max data values', [10000])),
        ('metadata_file_pattern', ConfigItem('Metadata file pattern',
                                             'L*MTL.txt')),
        ('calc_pheno', ConfigItem('LTM phenology', False)),
    ))

    # Driver controls
    controls_title = 'YATSM Algorithm Options'
    controls = OrderedDict((
        ('calculate_live', ConfigItem('Calculate live', True)),
        ('consecutive', ConfigItem('Consecutive', 5)),
        ('min_obs', ConfigItem('Min obs.', 16)),
        ('threshold', ConfigItem('Threshold', 4.0)),
        ('enable_min_rmse', ConfigItem('Use min RMSE?', True)),
        ('min_rmse', ConfigItem('Min RMSE', 100.0)),
        ('design', ConfigItem('Design', '1 + x + harm(x, 1)')),
        ('test_indices', ConfigItem('Test indices', np.array([2, 3, 4, 5]))),
        ('dynamic_rmse', ConfigItem('Dynamic RMSE', True)),
        ('screen_crit', ConfigItem('Screening crit value', 400.0)),
        ('remove_noise', ConfigItem('Remove noise', True)),
        ('reverse', ConfigItem('Reverse', False)),
        ('regression_type', ConfigItem('Regression type', 'sklearn_Lasso20')),
        ('robust_results', ConfigItem('Robust results', False)),
        ('commit_test', ConfigItem('Commission test', False)),
        ('commit_alpha', ConfigItem('Commission test alpha', 0.10)),
    ))

    def __init__(self, location, config=None):
        super(YATSMTimeSeries, self).__init__(location, config=config)
        # Check for YATSM imports
        if not has_yatsm:
            raise ImportError(has_yatsm_msg)
        if self.config['calc_pheno'].value and not has_yatsm_pheno:
            raise ImportError(has_yatsm_pheno_msg)

        # Find extra metadata
        self._init_metadata()

        # Setup YATSM
        self.yatsm_model = None
        self.X = None
        self.Y = None
        self.coef_name = 'coef'

        # Setup min/max values
        desc, _min_values = self.config['min_values']
        if len(_min_values) == 1:
            _min_values = np.repeat(_min_values, self.series[0].count - 1)
        self.config['min_values'] = ConfigItem(desc, _min_values)

        desc, _max_values = self.config['max_values']
        if len(_max_values) == 1:
            _max_values = np.repeat(_max_values, self.series[0].count - 1)
        self.config['max_values'] = ConfigItem(desc, _max_values)

    def set_custom_controls(self, values):
        logger.debug('Setting custom values')
        for val, attr in zip(values, self.controls):
            desc, current_val = self.controls[attr]
            if isinstance(val, type(current_val)):
                self.controls[attr] = ConfigItem(desc, val)
            else:
                # Make an exception for minimum RMSE since we can pass None
                if attr == 'min_rmse' and isinstance(val, float):
                    self.controls[attr] = ConfigItem(desc, val)
                else:
                    msg = 'Could not set {k} to {v} (current: {c})'.format(
                        k=attr, v=val, c=current_val)
                    raise ValueError(msg)

    def fetch_results(self):
        """ Read or calculate results for current pixel """
        if self.controls['calculate_live'].value:
            self._fetch_results_live()
        else:
            self._fetch_results_saved()

        # Update multitemporal screening metadata
        if self.yatsm_model:
            if (self.controls['calculate_live'] and
                    hasattr(self.yatsm_model, 'X')):
                self.series[0].multitemp_screened = \
                    np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1],
                            invert=True).astype(np.uint8)
            if self.config['calc_pheno'].value:
                for rec in self.yatsm_model.record:
                    # Find dates in record
                    idx = np.where(
                        (self.series[0].images['ordinal'] >= rec['start']) &
                        (self.series[0].images['ordinal'] <= rec['end']))[0]
                    # Put observations into SPR/SUM/AUT
                    _spr = np.where(self.series[0].images['doy'][idx] <=
                                    rec['spring_doy'])[0]
                    _sum = np.where((self.series[0].images['doy'][idx] >
                                     rec['spring_doy']) &
                                    (self.series[0].images['doy'][idx] <
                                     rec['autumn_doy']))[0]
                    _aut = np.where(self.series[0].images['doy'][idx] >=
                                    rec['autumn_doy'])[0]
                    self.series[0].pheno[idx[_spr]] = 'SPR'
                    self.series[0].pheno[idx[_sum]] = 'SUM'
                    self.series[0].pheno[idx[_aut]] = 'AUT'

    def get_prediction(self, series, band, dates=None):
        """ Return prediction for a given band

        Args:
          series (int): index of Series used for prediction
          band (int): index of band to return
          dates (iterable): list or np.ndarray of ordinal dates to predict; if
            None, predicts for every date within timeseries (default: None)

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            predictions

        """
        if series > 0:
            return
        if self.yatsm_model is None or len(self.yatsm_model.record) == 0:
            return
        if band >= self.yatsm_model.record[self.coef_name].shape[2]:
            logger.debug('Not results for band %i' % band)
            return

        # Setup output
        mx = []
        my = []

        # Don't predict with any categorical information
        eqn = (self.controls['design'].value
               if self.controls['calculate_live'].value
               else self._design)
        design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', eqn)
        coef_columns = []
        for k, v in self._design_info.iteritems():
            if not re.match('C\(.*\)', k):
                coef_columns.append(v)
        coef_columns = np.sort(np.asarray(coef_columns))

        for rec in self.yatsm_model.record:
            # Check for reverse
            if rec['end'] < rec['start']:
                i_step = -1
            else:
                i_step = 1
            # Date range to predict
            if dates is not None:
                end = max(rec['break'], rec['end'])
                _mx = dates[np.where((dates >= rec['start']) &
                                     (dates <= end))[0]]
            else:
                _mx = np.arange(rec['start'], rec['end'], i_step)

            if _mx.size == 0:
                continue
            # Coefficients to use for prediction
            _coef = rec[self.coef_name][coef_columns, band]
            # Setup design matrix
            _mX = patsy.dmatrix(design, {'x': _mx}).T
            # Predict
            _my = np.dot(_coef, _mX)
            # Transform ordinal back to datetime for plotting
            _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx])

            mx.append(_mx)
            my.append(_my)

        return mx, my

    def get_breaks(self, series, band):
        """ Return break points for a given band

        Args:
          series (int): index of Series for prediction
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            break points

        """
        if self.yatsm_model is None:
            return
        # Setup output
        bx = []
        by = []

        if len(self.yatsm_model.record) > 0:
            for rec in self.yatsm_model.record:
                if rec['break'] != 0:
                    _bx = dt.fromordinal(int(rec['break']))
                    index = np.where(self.series[series].images['date'] ==
                                     _bx)[0]
                    if (index.size > 0 and
                            index[0] < self.series[series].data.shape[1]):
                        bx.append(_bx)
                        by.append(self.series[series].data[band, index[0]])
                    else:
                        logger.warning('Could not determine breakpoint')

        return bx, by

    def get_residuals(self, series, band):
        """ Return model residuals (y - predicted yhat) for a given band

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            residual dates and values

        """
        if self.yatsm_model is None:
            return
        rx, ry = [], []

        X, y = self.get_data(series, band, mask=settings.plot['mask'])
        predict = self.get_prediction(series, band, dates=X['ordinal'])
        if predict is None:
            return
        date, yhat = predict

        for _date, _yhat in zip(date, yhat):
            idx = np.in1d(X['date'], _date)
            resid = y[idx] - _yhat

            rx.append(_date)
            ry.append(resid)

        return rx, ry

    def get_plot(self, series, band, axis, desc):
        """ Plot some information on an axis for a plot of some description

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return
          axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on
          desc (str): description of plot, usually a plot class from
            `tstools.plots`

        Returns:
          iterable: list of artists to include in legend

        """
        artists = []
        if desc == 'TSPlot':
            for rec in self.yatsm_model.record:
                _x = (rec['start'] + rec['end']) / 2.0
                _x, _y = self.get_prediction(series, band,
                                             dates=np.array([_x]))
                _x = _x[0][0]
                _y = _y[0][0] + 250
                axis.text(_x, _y, 'RMSE: %.3f' % rec['rmse'][band],
                          fontsize=18,
                          horizontalalignment='center')
        elif desc == 'DOYPlot':
            has_dates = all([r in self.yatsm_model.record.dtype.names
                             for r in ('spring_doy', 'autumn_doy')])
            if self.config['calc_pheno'].value and has_dates:
                colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1]

                color_cycle = itertools.cycle(colors)
                for i, rec in enumerate(self.yatsm_model.record):
                    col = [c for c in color_cycle.next()]
                    artists.append(
                        axis.axvline(rec['spring_doy'], label='Model %i' % i,
                                     c=col, lw=2)
                    )
                    axis.axvline(rec['autumn_doy'], label='Model %i' % i,
                                 c=col, lw=2)

        return artists

# RESULTS HELPER METHODS
    def _fetch_results_saved(self):
        """ Read YATSM results and return """
        self.yatsm_model = MockResult()
        row, col = self.series[0].py, self.series[0].px

        data_cfg = {
            'output': os.path.join(self.location,
                                   self.config['results_folder'].value),
            'output_prefix': (self.config['results_pattern'].value
                              .replace('*', ''))
        }
        result_filename = get_output_name(data_cfg, row)
        logger.info('Attempting to open: {f}'.format(f=result_filename))

        if not os.path.isfile(result_filename):
            qgis_log('Could not find result for row {r} ({fn})'.format(
                r=row, fn=result_filename))
            return

        z = np.load(result_filename)
        if 'record' not in z.files:
            raise KeyError('Cannot find "record" within saved result ({})'
                           .format(result_filename))
        if 'metadata' not in z.files:
            raise KeyError('Cannot find "metadata" within saved result ({})'
                           .format(result_filename))
        metadata = z['metadata'].item()
        if 'design' not in metadata['YATSM']:
            raise KeyError('Cannot find "design" within saved result metadata '
                           '({})'.format(result_filename))
        self._design = metadata['YATSM']['design_matrix']
        self._design_info = metadata['YATSM']['design']

        rec = z['record']
        idx = np.where((rec['px'] == col) & (rec['py'] == row))[0]
        self.yatsm_model.record = rec[idx]

    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(self.controls['design'].value,
                               {'x': self.series[0].images['ordinal'],
                                'sensor': self.series[0].sensor,
                                'pr': self.series[0].pathrow})
        self._design_info = self.X.design_info.column_name_indexes
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self.config['mask_band'].value[0] - 1, :]
        Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1,
                           axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data,
                               self.config['min_values'].value,
                               self.config['max_values'].value).astype(np.bool)
        clear *= valid

        # Setup parameters
        estimator = sklearn.linear_model.Lasso(alpha=20)
        reg = self.controls['regression_type'].value
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    estimator = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning(
                'Using failsafe Lasso(lambda=20) from scikit-learn. '
                'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            estimator=estimator,
            test_indices=self.controls['test_indices'].value,
            consecutive=self.controls['consecutive'].value,
            threshold=self.controls['threshold'].value,
            min_obs=self.controls['min_obs'].value,
            min_rmse=(None if self.controls['enable_min_rmse'].value else
                      self.controls['min_rmse'].value),
            screening_crit=self.controls['screen_crit'].value,
            remove_noise=self.controls['remove_noise'].value,
            dynamic_rmse=self.controls['dynamic_rmse'].value,
        )

        self.yatsm_model = CCDCesque(**version_kwargs(kwargs))
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self.controls['reverse'].value:
            self.yatsm_model.fit(
                np.flipud(self.X[clear, :]),
                np.fliplr(Y_data[:, clear]),
                self.dates[clear][::-1])
        else:
            self.yatsm_model.fit(
                self.X[clear, :],
                Y_data[:, clear],
                self.dates[clear])

        if self.controls['commit_test'].value:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self.controls['commit_alpha'].value)

        # if self.controls['robust_results'].value:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self.config['calc_pheno'].value:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology()
            self.yatsm_model.record = ltm.fit(self.yatsm_model)

        # Restore log level
        logger.setLevel(log_level)

# SETUP
    def _init_metadata(self):
        """ Setup metadata for series """
        # Find MTL file
        self.mtl_files = None
        if self.config['metadata_file_pattern'].value:
            search = find_files(
                self.location, self.config['metadata_file_pattern'].value,
                ignore_dirs=[self.config['results_folder'].value])
            if len(search) == 0:
                logger.error(
                    'Could not find image metadata with pattern {p}'.format(
                        p=self.config['metadata_file_pattern'].value))
            if len(search) != len(self.series[0].images['date']):
                logger.error(
                    'Inconsistent number of metadata files found: '
                    '{0} images vs {1} metadata files)'.format(
                        len(self.series[0].images['date']),
                        len(search)))
            else:
                self.mtl_files = search

        # Setup metadata for series
        self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened']
        self.series[0].metadata_names = ['Sensor', 'Path/Row',
                                         'Multitemp Screened']
        self.series[0].metadata_table = [False, False, False]

        # Sensor ID
        self.series[0].sensor = np.array([n[0:3] for n in
                                          self.series[0].images['filename']])
        # Path/row
        self.series[0].pathrow = np.array([
            'p{p}r{r}'.format(p=n[3:6], r=n[6:9]) for
            n in self.series[0].images['filename']])
        # Multitemporal noise screening - init to 0 (not screened)
        #   Will update this during model fitting
        self.series[0].multitemp_screened = np.ones(self.series[0].n)
        # Make an entry 0 so we get this in the unique values
        self.series[0].multitemp_screened[0] = 0

        # If we found MTL files, find cloud cover
        if self.mtl_files is not None:
            self.series[0].metadata.append('cloud_cover')
            self.series[0].metadata_names.append('Cloud cover')
            self.series[0].metadata_table.append(True)
            self.series[0].cloud_cover = np.ones(self.series[0].n) * -9999
            cloud_cover = {}
            for mtl_file in self.mtl_files:
                attrs = parse_landsat_MTL(mtl_file, ['LANDSAT_SCENE_ID',
                                                     'CLOUD_COVER'])
                scene_ID = attrs.get('LANDSAT_SCENE_ID')
                if scene_ID:
                    cloud_cover[scene_ID] = attrs.get('CLOUD_COVER', -9999.0)

            for idx, _id in enumerate(self.series[0].images['id']):
                self.series[0].cloud_cover[idx] = cloud_cover.get(_id, -9999.0)

        if self.config['calc_pheno'].value:
            self.series[0].metadata.append('pheno')
            self.series[0].metadata_names.append('Phenology')
            self.series[0].metadata_table.append(False)
            # Initialize almost all as summer (SUM); first two as SPR/AUT
            self.series[0].pheno = np.repeat('SUM', self.series[0].n)
            self.series[0].pheno[0] = 'SPR'
            self.series[0].pheno[1] = 'AUT'
コード例 #15
0
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries):
    """ Timeseries driver for CCDCesque algorithm implemented in YATSM

    Requires a working installation of YATSM. For more information, visit
    the [YATSM Github website](https://github.com/ceholden/yatsm).

    This driver requires the following Python packages in addition to basic
    TSTools package dependencies:

    * [`scikit-learn`](http://scikit-learn.org/stable/)
    * [`patsy`](https://patsy.readthedocs.org/en/latest/)
    * [`yatsm`](https://github.com/ceholden/yatsm)
    """
    description = 'YATSM CCDCesque Timeseries'
    location = None
    mask_values = np.array([2, 3, 4, 255])
    has_results = True

    # Driver configuration
    config = OrderedDict((
        ('stack_pattern', ConfigItem('Stack pattern', 'L*stack')),
        ('date_index', ConfigItem('Date index', [9, 16])),
        ('date_format', ConfigItem('Date format', '%Y%j')),
        ('cache_folder', ConfigItem('Cache folder', 'cache')),
        ('results_folder', ConfigItem('Results folder', 'YATSM')),
        ('results_pattern', ConfigItem('Results pattern', 'yatsm_r*')),
        ('mask_band', ConfigItem('Mask band', [8])),
        ('min_values', ConfigItem('Min data values', [0])),
        ('max_values', ConfigItem('Max data values', [10000])),
        ('metadata_file_pattern',
         ConfigItem('Metadata file pattern', 'L*MTL.txt')),
        ('calc_pheno', ConfigItem('LTM phenology', False)),
    ))

    # Driver controls
    controls_title = 'YATSM Algorithm Options'
    controls = OrderedDict((
        ('calculate_live', ConfigItem('Calculate live', True)),
        ('consecutive', ConfigItem('Consecutive', 5)),
        ('min_obs', ConfigItem('Min obs.', 16)),
        ('threshold', ConfigItem('Threshold', 4.0)),
        ('enable_min_rmse', ConfigItem('Use min RMSE?', True)),
        ('min_rmse', ConfigItem('Min RMSE', 100.0)),
        ('design', ConfigItem('Design', '1 + x + harm(x, 1)')),
        ('test_indices', ConfigItem('Test indices', np.array([2, 3, 4, 5]))),
        ('dynamic_rmse', ConfigItem('Dynamic RMSE', True)),
        ('screen_crit', ConfigItem('Screening crit value', 400.0)),
        ('remove_noise', ConfigItem('Remove noise', True)),
        ('reverse', ConfigItem('Reverse', False)),
        ('regression_type', ConfigItem('Regression type', 'sklearn_Lasso20')),
        ('robust_results', ConfigItem('Robust results', False)),
        ('commit_test', ConfigItem('Commission test', False)),
        ('commit_alpha', ConfigItem('Commission test alpha', 0.10)),
    ))

    def __init__(self, location, config=None):
        super(YATSMTimeSeries, self).__init__(location, config=config)
        # Check for YATSM imports
        if not has_yatsm:
            raise ImportError(has_yatsm_msg)
        if self.config['calc_pheno'].value and not has_yatsm_pheno:
            raise ImportError(has_yatsm_pheno_msg)

        # Find extra metadata
        self._init_metadata()

        # Setup YATSM
        self.yatsm_model = None
        self.X = None
        self.Y = None
        self.coef_name = 'coef'

        # Setup min/max values
        desc, _min_values = self.config['min_values']
        if len(_min_values) == 1:
            _min_values = np.repeat(_min_values, self.series[0].count - 1)
        self.config['min_values'] = ConfigItem(desc, _min_values)

        desc, _max_values = self.config['max_values']
        if len(_max_values) == 1:
            _max_values = np.repeat(_max_values, self.series[0].count - 1)
        self.config['max_values'] = ConfigItem(desc, _max_values)

    def set_custom_controls(self, values):
        logger.debug('Setting custom values')
        for val, attr in zip(values, self.controls):
            desc, current_val = self.controls[attr]
            if isinstance(val, type(current_val)):
                self.controls[attr] = ConfigItem(desc, val)
            else:
                # Make an exception for minimum RMSE since we can pass None
                if attr == 'min_rmse' and isinstance(val, float):
                    self.controls[attr] = ConfigItem(desc, val)
                else:
                    msg = 'Could not set {k} to {v} (current: {c})'.format(
                        k=attr, v=val, c=current_val)
                    raise ValueError(msg)

    def fetch_results(self):
        """ Read or calculate results for current pixel """
        if self.controls['calculate_live'].value:
            self._fetch_results_live()
        else:
            self._fetch_results_saved()

        # Update multitemporal screening metadata
        if self.yatsm_model:
            if (self.controls['calculate_live']
                    and hasattr(self.yatsm_model, 'X')):
                self.series[0].multitemp_screened = \
                    np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1],
                            invert=True).astype(np.uint8)
            if self.config['calc_pheno'].value:
                for rec in self.yatsm_model.record:
                    # Find dates in record
                    idx = np.where(
                        (self.series[0].images['ordinal'] >= rec['start'])
                        & (self.series[0].images['ordinal'] <= rec['end']))[0]
                    # Put observations into SPR/SUM/AUT
                    _spr = np.where(
                        self.series[0].images['doy'][idx] <= rec['spring_doy']
                    )[0]
                    _sum = np.where(
                        (self.series[0].images['doy'][idx] > rec['spring_doy'])
                        & (self.series[0].images['doy'][idx] <
                           rec['autumn_doy']))[0]
                    _aut = np.where(
                        self.series[0].images['doy'][idx] >= rec['autumn_doy']
                    )[0]
                    self.series[0].pheno[idx[_spr]] = 'SPR'
                    self.series[0].pheno[idx[_sum]] = 'SUM'
                    self.series[0].pheno[idx[_aut]] = 'AUT'

    def get_prediction(self, series, band, dates=None):
        """ Return prediction for a given band

        Args:
          series (int): index of Series used for prediction
          band (int): index of band to return
          dates (iterable): list or np.ndarray of ordinal dates to predict; if
            None, predicts for every date within timeseries (default: None)

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            predictions

        """
        if series > 0:
            return
        if self.yatsm_model is None or len(self.yatsm_model.record) == 0:
            return
        if band >= self.yatsm_model.record[self.coef_name].shape[2]:
            logger.debug('Not results for band %i' % band)
            return

        # Setup output
        mx = []
        my = []

        # Don't predict with any categorical information
        eqn = (self.controls['design'].value
               if self.controls['calculate_live'].value else self._design)
        design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', eqn)
        coef_columns = []
        for k, v in self._design_info.iteritems():
            if not re.match('C\(.*\)', k):
                coef_columns.append(v)
        coef_columns = np.sort(np.asarray(coef_columns))

        for rec in self.yatsm_model.record:
            # Check for reverse
            if rec['end'] < rec['start']:
                i_step = -1
            else:
                i_step = 1
            # Date range to predict
            if dates is not None:
                end = max(rec['break'], rec['end'])
                _mx = dates[np.where((dates >= rec['start'])
                                     & (dates <= end))[0]]
            else:
                _mx = np.arange(rec['start'], rec['end'], i_step)

            if _mx.size == 0:
                continue
            # Coefficients to use for prediction
            _coef = rec[self.coef_name][coef_columns, band]
            # Setup design matrix
            _mX = patsy.dmatrix(design, {'x': _mx}).T
            # Predict
            _my = np.dot(_coef, _mX)
            # Transform ordinal back to datetime for plotting
            _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx])

            mx.append(_mx)
            my.append(_my)

        return mx, my

    def get_breaks(self, series, band):
        """ Return break points for a given band

        Args:
          series (int): index of Series for prediction
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            break points

        """
        if self.yatsm_model is None:
            return
        # Setup output
        bx = []
        by = []

        if len(self.yatsm_model.record) > 0:
            for rec in self.yatsm_model.record:
                if rec['break'] != 0:
                    _bx = dt.fromordinal(int(rec['break']))
                    index = np.where(
                        self.series[series].images['date'] == _bx)[0]
                    if (index.size > 0
                            and index[0] < self.series[series].data.shape[1]):
                        bx.append(_bx)
                        by.append(self.series[series].data[band, index[0]])
                    else:
                        logger.warning('Could not determine breakpoint')

        return bx, by

    def get_residuals(self, series, band):
        """ Return model residuals (y - predicted yhat) for a given band

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            residual dates and values

        """
        if self.yatsm_model is None:
            return
        rx, ry = [], []

        X, y = self.get_data(series, band, mask=settings.plot['mask'])
        predict = self.get_prediction(series, band, dates=X['ordinal'])
        if predict is None:
            return
        date, yhat = predict

        for _date, _yhat in zip(date, yhat):
            idx = np.in1d(X['date'], _date)
            resid = y[idx] - _yhat

            rx.append(_date)
            ry.append(resid)

        return rx, ry

    def get_plot(self, series, band, axis, desc):
        """ Plot some information on an axis for a plot of some description

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return
          axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on
          desc (str): description of plot, usually a plot class from
            `tstools.plots`

        Returns:
          iterable: list of artists to include in legend

        """
        artists = []
        if desc == 'TSPlot':
            for rec in self.yatsm_model.record:
                _x = (rec['start'] + rec['end']) / 2.0
                _x, _y = self.get_prediction(series,
                                             band,
                                             dates=np.array([_x]))
                _x = _x[0][0]
                _y = _y[0][0] + 250
                axis.text(_x,
                          _y,
                          'RMSE: %.3f' % rec['rmse'][band],
                          fontsize=18,
                          horizontalalignment='center')
        elif desc == 'DOYPlot':
            has_dates = all([
                r in self.yatsm_model.record.dtype.names
                for r in ('spring_doy', 'autumn_doy')
            ])
            if self.config['calc_pheno'].value and has_dates:
                colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1]

                color_cycle = itertools.cycle(colors)
                for i, rec in enumerate(self.yatsm_model.record):
                    col = [c for c in color_cycle.next()]
                    artists.append(
                        axis.axvline(rec['spring_doy'],
                                     label='Model %i' % i,
                                     c=col,
                                     lw=2))
                    axis.axvline(rec['autumn_doy'],
                                 label='Model %i' % i,
                                 c=col,
                                 lw=2)

        return artists

# RESULTS HELPER METHODS

    def _fetch_results_saved(self):
        """ Read YATSM results and return """
        self.yatsm_model = MockResult()
        row, col = self.series[0].py, self.series[0].px

        data_cfg = {
            'output':
            os.path.join(self.location, self.config['results_folder'].value),
            'output_prefix':
            (self.config['results_pattern'].value.replace('*', ''))
        }
        result_filename = get_output_name(data_cfg, row)
        logger.info('Attempting to open: {f}'.format(f=result_filename))

        if not os.path.isfile(result_filename):
            qgis_log('Could not find result for row {r} ({fn})'.format(
                r=row, fn=result_filename))
            return

        z = np.load(result_filename)
        if 'record' not in z.files:
            raise KeyError(
                'Cannot find "record" within saved result ({})'.format(
                    result_filename))
        if 'metadata' not in z.files:
            raise KeyError(
                'Cannot find "metadata" within saved result ({})'.format(
                    result_filename))
        metadata = z['metadata'].item()
        if 'design' not in metadata['YATSM']:
            raise KeyError('Cannot find "design" within saved result metadata '
                           '({})'.format(result_filename))
        self._design = metadata['YATSM']['design_matrix']
        self._design_info = metadata['YATSM']['design']

        rec = z['record']
        idx = np.where((rec['px'] == col) & (rec['py'] == row))[0]
        self.yatsm_model.record = rec[idx]

    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(
            self.controls['design'].value, {
                'x': self.series[0].images['ordinal'],
                'sensor': self.series[0].sensor,
                'pr': self.series[0].pathrow
            })
        self._design_info = self.X.design_info.column_name_indexes
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self.config['mask_band'].value[0] - 1, :]
        Y_data = np.delete(self.Y,
                           self.config['mask_band'].value[0] - 1,
                           axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data, self.config['min_values'].value,
                               self.config['max_values'].value).astype(np.bool)
        clear *= valid

        # Setup parameters
        estimator = sklearn.linear_model.Lasso(alpha=20)
        reg = self.controls['regression_type'].value
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    estimator = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning(
                'Using failsafe Lasso(lambda=20) from scikit-learn. '
                'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            estimator=estimator,
            test_indices=self.controls['test_indices'].value,
            consecutive=self.controls['consecutive'].value,
            threshold=self.controls['threshold'].value,
            min_obs=self.controls['min_obs'].value,
            min_rmse=(None if self.controls['enable_min_rmse'].value else
                      self.controls['min_rmse'].value),
            screening_crit=self.controls['screen_crit'].value,
            remove_noise=self.controls['remove_noise'].value,
            dynamic_rmse=self.controls['dynamic_rmse'].value,
        )

        self.yatsm_model = CCDCesque(**version_kwargs(kwargs))
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self.controls['reverse'].value:
            self.yatsm_model.fit(np.flipud(self.X[clear, :]),
                                 np.fliplr(Y_data[:, clear]),
                                 self.dates[clear][::-1])
        else:
            self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear],
                                 self.dates[clear])

        if self.controls['commit_test'].value:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self.controls['commit_alpha'].value)

        # if self.controls['robust_results'].value:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self.config['calc_pheno'].value:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology()
            self.yatsm_model.record = ltm.fit(self.yatsm_model)

        # Restore log level
        logger.setLevel(log_level)


# SETUP

    def _init_metadata(self):
        """ Setup metadata for series """
        # Find MTL file
        self.mtl_files = None
        if self.config['metadata_file_pattern'].value:
            search = find_files(
                self.location,
                self.config['metadata_file_pattern'].value,
                ignore_dirs=[self.config['results_folder'].value])
            if len(search) == 0:
                logger.error(
                    'Could not find image metadata with pattern {p}'.format(
                        p=self.config['metadata_file_pattern'].value))
            if len(search) != len(self.series[0].images['date']):
                logger.error('Inconsistent number of metadata files found: '
                             '{0} images vs {1} metadata files)'.format(
                                 len(self.series[0].images['date']),
                                 len(search)))
            else:
                self.mtl_files = search

        # Setup metadata for series
        self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened']
        self.series[0].metadata_names = [
            'Sensor', 'Path/Row', 'Multitemp Screened'
        ]
        self.series[0].metadata_table = [False, False, False]

        # Sensor ID
        self.series[0].sensor = np.array(
            [n[0:3] for n in self.series[0].images['filename']])
        # Path/row
        self.series[0].pathrow = np.array([
            'p{p}r{r}'.format(p=n[3:6], r=n[6:9])
            for n in self.series[0].images['filename']
        ])
        # Multitemporal noise screening - init to 0 (not screened)
        #   Will update this during model fitting
        self.series[0].multitemp_screened = np.ones(self.series[0].n)
        # Make an entry 0 so we get this in the unique values
        self.series[0].multitemp_screened[0] = 0

        # If we found MTL files, find cloud cover
        if self.mtl_files is not None:
            self.series[0].metadata.append('cloud_cover')
            self.series[0].metadata_names.append('Cloud cover')
            self.series[0].metadata_table.append(True)
            self.series[0].cloud_cover = np.ones(self.series[0].n) * -9999
            cloud_cover = {}
            for mtl_file in self.mtl_files:
                attrs = parse_landsat_MTL(mtl_file,
                                          ['LANDSAT_SCENE_ID', 'CLOUD_COVER'])
                scene_ID = attrs.get('LANDSAT_SCENE_ID')
                if scene_ID:
                    cloud_cover[scene_ID] = attrs.get('CLOUD_COVER', -9999.0)

            for idx, _id in enumerate(self.series[0].images['id']):
                self.series[0].cloud_cover[idx] = cloud_cover.get(_id, -9999.0)

        if self.config['calc_pheno'].value:
            self.series[0].metadata.append('pheno')
            self.series[0].metadata_names.append('Phenology')
            self.series[0].metadata_table.append(False)
            # Initialize almost all as summer (SUM); first two as SPR/AUT
            self.series[0].pheno = np.repeat('SUM', self.series[0].n)
            self.series[0].pheno[0] = 'SPR'
            self.series[0].pheno[1] = 'AUT'
コード例 #16
0
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries):
    """ Timeseries driver for YATSM algorithm
    """
    description = 'YATSM CCDCesque Timeseries'
    location = None
    mask_values = np.array([2, 3, 4, 255])

    # Driver configuration
    _stack_pattern = 'L*stack'
    _date_index = [9, 16]
    _date_format = '%Y%j'
    _cache_folder = 'cache'
    _results_folder = 'YATSM'
    _results_pattern = 'yatsm_r*'
    _mask_band = [8]
    _min_values = [0]
    _max_values = [10000]
    _metadata_file_pattern = 'L*MTL.txt'
    _calc_pheno = False

    config = ['_stack_pattern',
              '_date_index',
              '_date_format',
              '_cache_folder',
              '_results_folder',
              '_results_pattern',
              '_mask_band',
              '_min_values', '_max_values',
              '_metadata_file_pattern',
              '_calc_pheno']
    config_names = [
        'Stack pattern',
        'Date index',
        'Date format',
        'Cache folder',
        'Results folder',
        'Results pattern',
        'Mask band',
        'Min data values', 'Max data values',
        'Metadata file pattern',
        'LTM phenology']

    # Driver controls
    _calculate_live = True
    _consecutive = 5
    _min_obs = 16
    _threshold = 4.0
    _enable_min_rmse = True
    _min_rmse = 100
    _design = '1 + x + harm(x, 1)'
    _test_indices = np.array([2, 3, 4, 5])
    _dynamic_rmse = True
    _screen_crit = 400.0
    _remove_noise = True
    _reverse = False
    _robust_results = False
    _commit_test = False
    _commit_alpha = 0.01

    # Requires YATSM>=v0.5.0
    _regression_type = 'sklearn_Lasso20'

    controls_title = 'YATSM Algorithm Options'
    controls = [
        '_calculate_live',
        '_consecutive',
        '_min_obs',
        '_threshold',
        '_enable_min_rmse',
        '_min_rmse',
        '_design',
        '_test_indices',
        '_dynamic_rmse',
        '_screen_crit',
        '_remove_noise',
        '_reverse',
        '_regression_type',
        '_robust_results',
        '_commit_test',
        '_commit_alpha']
    controls_names = [
        'Calculate live',
        'Consecutive',
        'Min Observations',
        'Threshold',
        'Use min RMSE?',
        'Min RMSE',
        'Design',
        'Test indices',
        'Dynamic RMSE',
        'Screening critical value',
        'Remove noise',
        'Run in reverse',
        'Regression type',
        'Robust results',
        'Commission test',
        'Commission test alpha']

    def __init__(self, location, config=None):
        super(YATSMTimeSeries, self).__init__(location, config=config)

        # Check for YATSM imports
        self._check_yatsm()
        # Find extra metadata
        self._init_metadata()

        # Setup YATSM
        self.yatsm_model = None
        self.X = None
        self.Y = None
        self.coef_name = 'coef'

        # Setup min/max values
        if len(self._min_values) == 1:
            self._min_values = self._min_values * (self.series[0].count - 1)
        if len(self._max_values) == 1:
            self._max_values = self._max_values * (self.series[0].count - 1)
        self._min_values = np.asarray(self._min_values)
        self._max_values = np.asarray(self._max_values)

    def set_custom_controls(self, values):
        logger.debug('Setting custom values')
        for v, k in zip(values, self.controls):
            current_value = getattr(self, k)
            if isinstance(v, type(current_value)):
                setattr(self, k, v)
            else:
                # Make an exception for minimum RMSE since we can pass None
                if k == 'min_rmse' and isinstance(v, float):
                    setattr(self, k, v)
                else:
                    msg = 'Could not set {k} to {v} (current: {c})'.format(
                        k=k, v=v, c=current_value)
                    raise Exception(msg)

    def fetch_results(self):
        """ Read or calculate results for current pixel """
        if self._calculate_live:
            self._fetch_results_live()
        else:
            self._fetch_results_saved()

        # Update multitemporal screening metadata
        if self.yatsm_model:
            self.series[0].multitemp_screened = \
                np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1],
                        invert=True).astype(np.uint8)
            if self._calc_pheno:
                for rec in self.yatsm_model.record:
                    # Find dates in record
                    idx = np.where(
                        (self.series[0].images['ordinal'] >= rec['start']) &
                        (self.series[0].images['ordinal'] <= rec['end']))[0]
                    # Put observations into SPR/SUM/AUT
                    _spr = np.where(self.series[0].images['doy'][idx] <=
                                    rec['spring_doy'])[0]
                    _sum = np.where((self.series[0].images['doy'][idx] >
                                     rec['spring_doy']) &
                                    (self.series[0].images['doy'][idx] <
                                     rec['autumn_doy']))[0]
                    _aut = np.where(self.series[0].images['doy'][idx] >=
                                    rec['autumn_doy'])[0]
                    self.series[0].pheno[idx[_spr]] = 'SPR'
                    self.series[0].pheno[idx[_sum]] = 'SUM'
                    self.series[0].pheno[idx[_aut]] = 'AUT'

    def get_prediction(self, series, band, dates=None):
        """ Return prediction for a given band

        Args:
          series (int): index of Series used for prediction
          band (int): index of band to return
          dates (iterable): list or np.ndarray of ordinal dates to predict; if
            None, predicts for every date within timeseries (default: None)

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            predictions

        """
        if series > 0:
            return
        if self.yatsm_model is None or len(self.yatsm_model.record) == 0:
            return
        if band >= self.yatsm_model.record[self.coef_name].shape[2]:
            logger.debug('Not results for band %i' % band)
            return

        # Setup output
        mx = []
        my = []

        # Don't predict with any categorical information
        design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', self._design)
        coef_columns = []
        for k, v in self._design_info.column_name_indexes.iteritems():
            if not re.match('C\(.*\)', k):
                coef_columns.append(v)
        coef_columns = np.asarray(coef_columns)

        for rec in self.yatsm_model.record:
            # Check for reverse
            if rec['end'] < rec['start']:
                i_step = -1
            else:
                i_step = 1
            # Date range to predict
            if dates is not None:
                end = max(rec['break'], rec['end'])
                _mx = dates[np.where((dates >= rec['start']) &
                                     (dates <= end))[0]]
            else:
                _mx = np.arange(rec['start'], rec['end'], i_step)

            if _mx.size == 0:
                continue
            # Coefficients to use for prediction
            _coef = rec[self.coef_name][coef_columns, band]
            # Setup design matrix
            _mX = patsy.dmatrix(design, {'x': _mx}).T
            # Predict
            _my = np.dot(_coef, _mX)
            # Transform ordinal back to datetime for plotting
            _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx])

            mx.append(_mx)
            my.append(_my)

        return mx, my

    def get_breaks(self, series, band):
        """ Return break points for a given band

        Args:
          series (int): index of Series for prediction
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            break points

        """
        if self.yatsm_model is None:
            return
        # Setup output
        bx = []
        by = []

        if len(self.yatsm_model.record) > 0:
            for rec in self.yatsm_model.record:
                if rec['break'] != 0:
                    _bx = dt.fromordinal(int(rec['break']))
                    index = np.where(self.series[series].images['date']
                                     == _bx)[0]
                    if (index.size > 0 and
                            index[0] < self.series[series].data.shape[1]):
                        bx.append(_bx)
                        by.append(self.series[series].data[band, index[0]])
                    else:
                        logger.warning('Could not determine breakpoint')

        return bx, by

    def get_residuals(self, series, band):
        """ Return model residuals (y - predicted yhat) for a given band

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return

        Returns:
          iterable: sequence of tuples (1D NumPy arrays, x and y) containing
            residual dates and values

        """
        if self.yatsm_model is None:
            return
        rx, ry = [], []

        X, y = self.get_data(series, band, mask=settings.plot['mask'])
        predict = self.get_prediction(series, band, dates=X['ordinal'])
        if predict is None:
            return
        date, yhat = predict

        for _date, _yhat in zip(date, yhat):
            idx = np.in1d(X['date'], _date)
            resid = y[idx] - _yhat

            rx.append(_date)
            ry.append(resid)

        return rx, ry

    def get_plot(self, series, band, axis, desc):
        """ Plot some information on an axis for a plot of some description

        Args:
          series (int): index of Series for residuals
          band (int): index of band to return
          axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on
          desc (str): description of plot, usually a plot class from
            `tstools.plots`

        Returns:
          iterable: list of artists to include in legend

        """
        artists = []
        if desc == 'TSPlot':
            for rec in self.yatsm_model.record:
                _x = (rec['start'] + rec['end']) / 2.0
                _x, _y = self.get_prediction(series, band,
                                             dates=np.array([_x]))
                _x = _x[0][0]
                _y = _y[0][0] + 250
                axis.text(_x, _y, 'RMSE: %.3f' % rec['rmse'][band],
                          fontsize=18,
                          horizontalalignment='center')
        elif desc == 'DOYPlot':
            names = self.yatsm_model.record.dtype.names
            if self._calc_pheno and all([r in names for r in
                                         ('spring_doy', 'autumn_doy')]):
                colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1]

                color_cycle = itertools.cycle(colors)
                for i, rec in enumerate(self.yatsm_model.record):
                    col = [c for c in color_cycle.next()]
                    artists.append(
                        axis.axvline(rec['spring_doy'], label='Model %i' % i,
                                     c=col, lw=2)
                    )
                    axis.axvline(rec['autumn_doy'], label='Model %i' % i,
                                 c=col, lw=2)

        return artists

# RESULTS HELPER METHODS
    def _fetch_results_saved(self):
        """ Read YATSM results and return """
        raise NotImplementedError('No saved results reading just yet...')

    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(self._design,
                               {
                                   'x': self.series[0].images['ordinal'],
                                   'sensor': self.series[0].sensor,
                                   'pr': self.series[0].pathrow
                               })
        self._design_info = self.X.design_info
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self._mask_band[0] - 1, :]
        Y_data = np.delete(self.Y, self._mask_band[0] - 1, axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data,
                               self._min_values,
                               self._max_values).astype(np.bool)
        clear *= valid
        # Setup Y

        # Setup parameters
        lm = sklearn.linear_model.Lasso(alpha=20)
        reg = self._regression_type
        print(self._regression_type)
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    lm = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning('Using failsafe Lasso(lambda=20) from scikit-learn. '
                           'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            test_indices=self._test_indices,
            consecutive=self._consecutive,
            threshold=self._threshold,
            min_obs=self._min_obs,
            min_rmse=None if self._enable_min_rmse else self._min_rmse,
            screening_crit=self._screen_crit,
            remove_noise=self._remove_noise,
            dynamic_rmse=self._dynamic_rmse,
        )

        self.yatsm_model = CCDCesque(lm=lm, **kwargs)
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self._reverse:
            self.yatsm_model.fit(
                np.flipud(self.X[clear, :]),
                np.fliplr(Y_data[:, clear]),
                np.fliplr(self.dates[clear]))
        else:
            self.yatsm_model.fit(
                self.X[clear, :],
                Y_data[:, clear],
                self.dates[clear])

        if self._commit_test:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self._commit_alpha)

        # if self._robust_results:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self._calc_pheno:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology(self.yatsm_model)
            self.yatsm_model.record = ltm.fit()

        # Restore log level
        logger.setLevel(log_level)

# SETUP
    def _init_metadata(self):
        """ Setup metadata for series """
        # Find MTL file
        self.mtl_files = None
        if self._metadata_file_pattern:
            search = find_files(self.location, self._metadata_file_pattern,
                                ignore_dirs=[self._results_folder])
            if len(search) == 0:
                logger.error(
                    'Could not find image metadata with pattern {p}'.format(
                        p=self._metadata_file_pattern))
            if len(search) != len(self.series[0].images['date']):
                logger.error('Inconsistent number of metadata files found: '
                             '{0} images vs {1} metadata files)'.format(
                                len(self.series[0].images['date']),
                                len(search)))
            else:
                self.mtl_files = search

        # Setup metadata for series
        self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened']
        self.series[0].metadata_names = ['Sensor', 'Path/Row',
                                         'Multitemp Screened']
        self.series[0].metadata_table = [False, False, False]

        # Sensor ID
        self.series[0].sensor = np.array([n[0:3] for n in
                                          self.series[0].images['filename']])
        # Path/row
        self.series[0].pathrow = np.array([
            'p{p}r{r}'.format(p=n[3:6], r=n[6:9]) for
            n in self.series[0].images['filename']])
        # Multitemporal noise screening - init to 0 (not screened)
        #   Will update this during model fitting
        self.series[0].multitemp_screened = np.ones(self.series[0].n)
        # Make an entry 0 so we get this in the unique values
        self.series[0].multitemp_screened[0] = 0

        # If we found MTL files, find cloud cover
        if self.mtl_files is not None:
            self.series[0].metadata.append('cloud_cover')
            self.series[0].metadata_names.append('Cloud cover')
            self.series[0].metadata_table.append(True)
            self.series[0].cloud_cover = np.zeros(self.series[0].n)
            for i, mtl_file in enumerate(self.mtl_files):
                self.series[0].cloud_cover[i] = parse_landsat_MTL(
                    mtl_file, 'CLOUD_COVER')

        if self._calc_pheno:
            self.series[0].metadata.append('pheno')
            self.series[0].metadata_names.append('Phenology')
            self.series[0].metadata_table.append(False)
            # Initialize almost all as summer (SUM); first two as SPR/AUT
            self.series[0].pheno = np.repeat('SUM', self.series[0].n)
            self.series[0].pheno[0] = 'SPR'
            self.series[0].pheno[1] = 'AUT'

    def _check_yatsm(self):
        """ Check if YATSM is available
        """
        try:
            global yatsm
            global CCDCesque, postprocess
            global harm
            global get_valid_mask
            import yatsm
            from yatsm.algorithms import CCDCesque, postprocess
            from yatsm._cyprep import get_valid_mask
            from yatsm.regression.transforms import harm
        except ImportError as e:
            raise ImportError('Could not import YATSM because it could not '
                              'import a dependency (%s)' % e.message)
        except:
            raise ImportError('Could not import YATSM for an unknown reason '
                              '(%s)' % e.message)
        else:
            self.has_results = True

        if self._calc_pheno:
            try:
                global pheno
                import yatsm.phenology as pheno
            except:
                msg = ('Could not import YATSM phenology module. '
                       'Make sure you have R and rpy2 installed.')
                raise ImportError(msg)
コード例 #17
0
    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(self._design,
                               {
                                   'x': self.series[0].images['ordinal'],
                                   'sensor': self.series[0].sensor,
                                   'pr': self.series[0].pathrow
                               })
        self._design_info = self.X.design_info
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self._mask_band[0] - 1, :]
        Y_data = np.delete(self.Y, self._mask_band[0] - 1, axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data,
                               self._min_values,
                               self._max_values).astype(np.bool)
        clear *= valid
        # Setup Y

        # Setup parameters
        lm = sklearn.linear_model.Lasso(alpha=20)
        reg = self._regression_type
        print(self._regression_type)
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    lm = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning('Using failsafe Lasso(lambda=20) from scikit-learn. '
                           'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            test_indices=self._test_indices,
            consecutive=self._consecutive,
            threshold=self._threshold,
            min_obs=self._min_obs,
            min_rmse=None if self._enable_min_rmse else self._min_rmse,
            screening_crit=self._screen_crit,
            remove_noise=self._remove_noise,
            dynamic_rmse=self._dynamic_rmse,
        )

        self.yatsm_model = CCDCesque(lm=lm, **kwargs)
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self._reverse:
            self.yatsm_model.fit(
                np.flipud(self.X[clear, :]),
                np.fliplr(Y_data[:, clear]),
                np.fliplr(self.dates[clear]))
        else:
            self.yatsm_model.fit(
                self.X[clear, :],
                Y_data[:, clear],
                self.dates[clear])

        if self._commit_test:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self._commit_alpha)

        # if self._robust_results:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self._calc_pheno:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology(self.yatsm_model)
            self.yatsm_model.record = ltm.fit()

        # Restore log level
        logger.setLevel(log_level)