def time_ccdcesque1(self, setup): """ Bench with 'defaults' defined in setup with most tests turned off """ kwargs = version_kwargs(setup['kwargs']) for i in range(n): model = CCDCesque(**kwargs) model.fit(setup['X'], setup['Y'], setup['dates'])
def pixel_CCDCesque(pipe, require, output, config=None): """ Run :class:`yatsm.algorithms.CCDCesque` on a pixel Users should pass to ``require`` both ``X`` and ``Y`` arguments, which are interpreted as: .. code-block:: python X, Y = require[0], require[1:] Args: pipe (yatsm.pipeline.Pipe): Piped data to operate on require (dict[str, list[str]]): Labels for the requirements of this calculation output (dict[str, list[str]]): Label for the result of this calculation config (dict): Configuration to pass to :class:`CCDCesque`. Should contain `init` section Returns: yatsm.pipeline.Pipe: Piped output """ XY = pipe.data[require['data']].dropna('time', how='any') X = XY[require['data'][0]] Y = XY[require['data'][1:]].to_array() model = CCDCesque(**config.get('init', {})) model.py, model.px = Y.y, Y.x model = model.fit(X, Y.values, XY['ordinal']) pipe.record[output[RECORD][0]] = model.record return pipe
def time_ccdcesque3(self, setup): """ Bench with remove_noise, dynamic_rmse turned on """ kwargs = version_kwargs(setup['kwargs']) kwargs.update({'remove_noise': True, 'dynamic_rmse': True}) for i in range(n): model = CCDCesque(**kwargs) model.fit(setup['X'], setup['Y'], setup['dates'])
def time_ccdcesque2(self, setup): """ Bench with remove_noise turned on """ kwargs = version_kwargs(setup['kwargs']) kwargs.update({'remove_noise': True}) for i in range(n): model = CCDCesque(**kwargs) model.fit(setup['X'], setup['Y'], setup['dates'])
def time_ccdcesque1(self, setup): """ Bench with 'defaults' defined in setup with most tests turned off """ kwargs = version_kwargs(setup['kwargs']) model = CCDCesque(**kwargs) for col in range(setup['Y'].shape[-1]): _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates'] mask = np.in1d(_Y[-1, :], [0, 1]) model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
def time_ccdcesque3(self, setup): """ Bench with remove_noise, dynamic_rmse turned on """ kwargs = version_kwargs(setup['kwargs']) kwargs.update({'remove_noise': True, 'dynamic_rmse': True}) model = CCDCesque(**kwargs) for col in range(setup['Y'].shape[-1]): _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates'] mask = np.in1d(_Y[-1, :], [0, 1]) model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
def time_ccdcesque2(self, setup): """ Bench with remove_noise turned on """ kwargs = version_kwargs(setup['kwargs']) kwargs.update({'remove_noise': True}) model = CCDCesque(**kwargs) for col in range(setup['Y'].shape[-1]): _Y, _X, _dates = setup['Y'][..., col], setup['X'], setup['dates'] mask = np.in1d(_Y[-1, :], [0, 1]) model.fit(_X[mask, :], _Y[:, mask], _dates[mask])
def _fetch_results_saved(self): """ Read YATSM results and return """ self.yatsm_model = MockResult() row, col = self.series[0].py, self.series[0].px data_cfg = { 'output': os.path.join(self.location, self.config['results_folder'].value), 'output_prefix': (self.config['results_pattern'].value .replace('*', '')) } result_filename = get_output_name(data_cfg, row) logger.info('Attempting to open: {f}'.format(f=result_filename)) if not os.path.isfile(result_filename): qgis_log('Could not find result for row {r} ({fn})'.format( r=row, fn=result_filename)) return z = np.load(result_filename) if 'record' not in z.files: raise KeyError('Cannot find "record" within saved result ({})' .format(result_filename)) if 'metadata' not in z.files: raise KeyError('Cannot find "metadata" within saved result ({})' .format(result_filename)) metadata = z['metadata'].item() if 'design' not in metadata['YATSM']: raise KeyError('Cannot find "design" within saved result metadata ' '({})'.format(result_filename)) self._design_info = metadata['YATSM']['design'] rec = z['record'] idx = np.where((rec['px'] == col) & (rec['py'] == row))[0] self.yatsm_model.record = rec[idx]
def pixel_CCDCesque(work, require, output, **config): """ Run CCDCesque on a pixel """ arr = work['data'][require['data']].dropna('time', how='any').to_array() model = CCDCesque(**config.get('init', {})) model.py, model.px = arr.y, arr.x ordinal = arr.indexes['time'].map(lambda x: x.toordinal()) design = config.get('fit', {}).get('design', '1 + ordinal') X = patsy.dmatrix(design, data=arr, eval_env=patsy.EvalEnvironment.capture()) work['record'][output['record'][0]] = model.fit(X, arr.values, ordinal) return work
def _fetch_results_saved(self): """ Read YATSM results and return """ self.yatsm_model = MockResult() row, col = self.series[0].py, self.series[0].px data_cfg = { 'output': os.path.join(self.location, self.config['results_folder'].value), 'output_prefix': (self.config['results_pattern'].value.replace('*', '')) } result_filename = get_output_name(data_cfg, row) logger.info('Attempting to open: {f}'.format(f=result_filename)) if not os.path.isfile(result_filename): qgis_log('Could not find result for row {r} ({fn})'.format( r=row, fn=result_filename)) return z = np.load(result_filename) if 'record' not in z.files: raise KeyError( 'Cannot find "record" within saved result ({})'.format( result_filename)) if 'metadata' not in z.files: raise KeyError( 'Cannot find "metadata" within saved result ({})'.format( result_filename)) metadata = z['metadata'].item() if 'design' not in metadata['YATSM']: raise KeyError('Cannot find "design" within saved result metadata ' '({})'.format(result_filename)) self._design = metadata['YATSM']['design_matrix'] self._design_info = metadata['YATSM']['design'] rec = z['record'] idx = np.where((rec['px'] == col) & (rec['py'] == row))[0] self.yatsm_model.record = rec[idx]
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix( self.controls['design'].value, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit(np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level)
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries): """ Timeseries driver for CCDCesque algorithm implemented in YATSM Requires a working installation of YATSM. For more information, visit the [YATSM Github website](https://github.com/ceholden/yatsm). This driver requires the following Python packages in addition to basic TSTools package dependencies: * [`scikit-learn`](http://scikit-learn.org/stable/) * [`patsy`](https://patsy.readthedocs.org/en/latest/) * [`yatsm`](https://github.com/ceholden/yatsm) """ description = 'YATSM CCDCesque Timeseries' location = None mask_values = np.array([2, 3, 4, 255]) has_results = True # Driver configuration config = OrderedDict(( ('stack_pattern', ConfigItem('Stack pattern', 'L*stack')), ('date_index', ConfigItem('Date index', [9, 16])), ('date_format', ConfigItem('Date format', '%Y%j')), ('cache_folder', ConfigItem('Cache folder', 'cache')), ('results_folder', ConfigItem('Results folder', 'YATSM')), ('results_pattern', ConfigItem('Results pattern', 'yatsm_r*')), ('mask_band', ConfigItem('Mask band', [8])), ('min_values', ConfigItem('Min data values', [0])), ('max_values', ConfigItem('Max data values', [10000])), ('metadata_file_pattern', ConfigItem('Metadata file pattern', 'L*MTL.txt')), ('calc_pheno', ConfigItem('LTM phenology', False)), )) # Driver controls controls_title = 'YATSM Algorithm Options' controls = OrderedDict(( ('calculate_live', ConfigItem('Calculate live', True)), ('consecutive', ConfigItem('Consecutive', 5)), ('min_obs', ConfigItem('Min obs.', 16)), ('threshold', ConfigItem('Threshold', 4.0)), ('enable_min_rmse', ConfigItem('Use min RMSE?', True)), ('min_rmse', ConfigItem('Min RMSE', 100.0)), ('design', ConfigItem('Design', '1 + x + harm(x, 1)')), ('test_indices', ConfigItem('Test indices', np.array([2, 3, 4, 5]))), ('dynamic_rmse', ConfigItem('Dynamic RMSE', True)), ('screen_crit', ConfigItem('Screening crit value', 400.0)), ('remove_noise', ConfigItem('Remove noise', True)), ('reverse', ConfigItem('Reverse', False)), ('regression_type', ConfigItem('Regression type', 'sklearn_Lasso20')), ('robust_results', ConfigItem('Robust results', False)), ('commit_test', ConfigItem('Commission test', False)), ('commit_alpha', ConfigItem('Commission test alpha', 0.10)), )) def __init__(self, location, config=None): super(YATSMTimeSeries, self).__init__(location, config=config) # Check for YATSM imports if not has_yatsm: raise ImportError(has_yatsm_msg) if self.config['calc_pheno'].value and not has_yatsm_pheno: raise ImportError(has_yatsm_pheno_msg) # Find extra metadata self._init_metadata() # Setup YATSM self.yatsm_model = None self.X = None self.Y = None self.coef_name = 'coef' # Setup min/max values desc, _min_values = self.config['min_values'] if len(_min_values) == 1: _min_values = np.repeat(_min_values, self.series[0].count - 1) self.config['min_values'] = ConfigItem(desc, _min_values) desc, _max_values = self.config['max_values'] if len(_max_values) == 1: _max_values = np.repeat(_max_values, self.series[0].count - 1) self.config['max_values'] = ConfigItem(desc, _max_values) def set_custom_controls(self, values): logger.debug('Setting custom values') for val, attr in zip(values, self.controls): desc, current_val = self.controls[attr] if isinstance(val, type(current_val)): self.controls[attr] = ConfigItem(desc, val) else: # Make an exception for minimum RMSE since we can pass None if attr == 'min_rmse' and isinstance(val, float): self.controls[attr] = ConfigItem(desc, val) else: msg = 'Could not set {k} to {v} (current: {c})'.format( k=attr, v=val, c=current_val) raise ValueError(msg) def fetch_results(self): """ Read or calculate results for current pixel """ if self.controls['calculate_live'].value: self._fetch_results_live() else: self._fetch_results_saved() # Update multitemporal screening metadata if self.yatsm_model: if (self.controls['calculate_live'] and hasattr(self.yatsm_model, 'X')): self.series[0].multitemp_screened = \ np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1], invert=True).astype(np.uint8) if self.config['calc_pheno'].value: for rec in self.yatsm_model.record: # Find dates in record idx = np.where( (self.series[0].images['ordinal'] >= rec['start']) & (self.series[0].images['ordinal'] <= rec['end']))[0] # Put observations into SPR/SUM/AUT _spr = np.where(self.series[0].images['doy'][idx] <= rec['spring_doy'])[0] _sum = np.where((self.series[0].images['doy'][idx] > rec['spring_doy']) & (self.series[0].images['doy'][idx] < rec['autumn_doy']))[0] _aut = np.where(self.series[0].images['doy'][idx] >= rec['autumn_doy'])[0] self.series[0].pheno[idx[_spr]] = 'SPR' self.series[0].pheno[idx[_sum]] = 'SUM' self.series[0].pheno[idx[_aut]] = 'AUT' def get_prediction(self, series, band, dates=None): """ Return prediction for a given band Args: series (int): index of Series used for prediction band (int): index of band to return dates (iterable): list or np.ndarray of ordinal dates to predict; if None, predicts for every date within timeseries (default: None) Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing predictions """ if series > 0: return if self.yatsm_model is None or len(self.yatsm_model.record) == 0: return if band >= self.yatsm_model.record[self.coef_name].shape[2]: logger.debug('Not results for band %i' % band) return # Setup output mx = [] my = [] # Don't predict with any categorical information eqn = (self.controls['design'].value if self.controls['calculate_live'].value else self._design) design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', eqn) coef_columns = [] for k, v in self._design_info.iteritems(): if not re.match('C\(.*\)', k): coef_columns.append(v) coef_columns = np.sort(np.asarray(coef_columns)) for rec in self.yatsm_model.record: # Check for reverse if rec['end'] < rec['start']: i_step = -1 else: i_step = 1 # Date range to predict if dates is not None: end = max(rec['break'], rec['end']) _mx = dates[np.where((dates >= rec['start']) & (dates <= end))[0]] else: _mx = np.arange(rec['start'], rec['end'], i_step) if _mx.size == 0: continue # Coefficients to use for prediction _coef = rec[self.coef_name][coef_columns, band] # Setup design matrix _mX = patsy.dmatrix(design, {'x': _mx}).T # Predict _my = np.dot(_coef, _mX) # Transform ordinal back to datetime for plotting _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx]) mx.append(_mx) my.append(_my) return mx, my def get_breaks(self, series, band): """ Return break points for a given band Args: series (int): index of Series for prediction band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing break points """ if self.yatsm_model is None: return # Setup output bx = [] by = [] if len(self.yatsm_model.record) > 0: for rec in self.yatsm_model.record: if rec['break'] != 0: _bx = dt.fromordinal(int(rec['break'])) index = np.where(self.series[series].images['date'] == _bx)[0] if (index.size > 0 and index[0] < self.series[series].data.shape[1]): bx.append(_bx) by.append(self.series[series].data[band, index[0]]) else: logger.warning('Could not determine breakpoint') return bx, by def get_residuals(self, series, band): """ Return model residuals (y - predicted yhat) for a given band Args: series (int): index of Series for residuals band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing residual dates and values """ if self.yatsm_model is None: return rx, ry = [], [] X, y = self.get_data(series, band, mask=settings.plot['mask']) predict = self.get_prediction(series, band, dates=X['ordinal']) if predict is None: return date, yhat = predict for _date, _yhat in zip(date, yhat): idx = np.in1d(X['date'], _date) resid = y[idx] - _yhat rx.append(_date) ry.append(resid) return rx, ry def get_plot(self, series, band, axis, desc): """ Plot some information on an axis for a plot of some description Args: series (int): index of Series for residuals band (int): index of band to return axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on desc (str): description of plot, usually a plot class from `tstools.plots` Returns: iterable: list of artists to include in legend """ artists = [] if desc == 'TSPlot': for rec in self.yatsm_model.record: _x = (rec['start'] + rec['end']) / 2.0 _x, _y = self.get_prediction(series, band, dates=np.array([_x])) _x = _x[0][0] _y = _y[0][0] + 250 axis.text(_x, _y, 'RMSE: %.3f' % rec['rmse'][band], fontsize=18, horizontalalignment='center') elif desc == 'DOYPlot': has_dates = all([r in self.yatsm_model.record.dtype.names for r in ('spring_doy', 'autumn_doy')]) if self.config['calc_pheno'].value and has_dates: colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1] color_cycle = itertools.cycle(colors) for i, rec in enumerate(self.yatsm_model.record): col = [c for c in color_cycle.next()] artists.append( axis.axvline(rec['spring_doy'], label='Model %i' % i, c=col, lw=2) ) axis.axvline(rec['autumn_doy'], label='Model %i' % i, c=col, lw=2) return artists # RESULTS HELPER METHODS def _fetch_results_saved(self): """ Read YATSM results and return """ self.yatsm_model = MockResult() row, col = self.series[0].py, self.series[0].px data_cfg = { 'output': os.path.join(self.location, self.config['results_folder'].value), 'output_prefix': (self.config['results_pattern'].value .replace('*', '')) } result_filename = get_output_name(data_cfg, row) logger.info('Attempting to open: {f}'.format(f=result_filename)) if not os.path.isfile(result_filename): qgis_log('Could not find result for row {r} ({fn})'.format( r=row, fn=result_filename)) return z = np.load(result_filename) if 'record' not in z.files: raise KeyError('Cannot find "record" within saved result ({})' .format(result_filename)) if 'metadata' not in z.files: raise KeyError('Cannot find "metadata" within saved result ({})' .format(result_filename)) metadata = z['metadata'].item() if 'design' not in metadata['YATSM']: raise KeyError('Cannot find "design" within saved result metadata ' '({})'.format(result_filename)) self._design = metadata['YATSM']['design_matrix'] self._design_info = metadata['YATSM']['design'] rec = z['record'] idx = np.where((rec['px'] == col) & (rec['py'] == row))[0] self.yatsm_model.record = rec[idx] def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix(self.controls['design'].value, {'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow}) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit( np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit( self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level) # SETUP def _init_metadata(self): """ Setup metadata for series """ # Find MTL file self.mtl_files = None if self.config['metadata_file_pattern'].value: search = find_files( self.location, self.config['metadata_file_pattern'].value, ignore_dirs=[self.config['results_folder'].value]) if len(search) == 0: logger.error( 'Could not find image metadata with pattern {p}'.format( p=self.config['metadata_file_pattern'].value)) if len(search) != len(self.series[0].images['date']): logger.error( 'Inconsistent number of metadata files found: ' '{0} images vs {1} metadata files)'.format( len(self.series[0].images['date']), len(search))) else: self.mtl_files = search # Setup metadata for series self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened'] self.series[0].metadata_names = ['Sensor', 'Path/Row', 'Multitemp Screened'] self.series[0].metadata_table = [False, False, False] # Sensor ID self.series[0].sensor = np.array([n[0:3] for n in self.series[0].images['filename']]) # Path/row self.series[0].pathrow = np.array([ 'p{p}r{r}'.format(p=n[3:6], r=n[6:9]) for n in self.series[0].images['filename']]) # Multitemporal noise screening - init to 0 (not screened) # Will update this during model fitting self.series[0].multitemp_screened = np.ones(self.series[0].n) # Make an entry 0 so we get this in the unique values self.series[0].multitemp_screened[0] = 0 # If we found MTL files, find cloud cover if self.mtl_files is not None: self.series[0].metadata.append('cloud_cover') self.series[0].metadata_names.append('Cloud cover') self.series[0].metadata_table.append(True) self.series[0].cloud_cover = np.ones(self.series[0].n) * -9999 cloud_cover = {} for mtl_file in self.mtl_files: attrs = parse_landsat_MTL(mtl_file, ['LANDSAT_SCENE_ID', 'CLOUD_COVER']) scene_ID = attrs.get('LANDSAT_SCENE_ID') if scene_ID: cloud_cover[scene_ID] = attrs.get('CLOUD_COVER', -9999.0) for idx, _id in enumerate(self.series[0].images['id']): self.series[0].cloud_cover[idx] = cloud_cover.get(_id, -9999.0) if self.config['calc_pheno'].value: self.series[0].metadata.append('pheno') self.series[0].metadata_names.append('Phenology') self.series[0].metadata_table.append(False) # Initialize almost all as summer (SUM); first two as SPR/AUT self.series[0].pheno = np.repeat('SUM', self.series[0].n) self.series[0].pheno[0] = 'SPR' self.series[0].pheno[1] = 'AUT'
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries): """ Timeseries driver for CCDCesque algorithm implemented in YATSM Requires a working installation of YATSM. For more information, visit the [YATSM Github website](https://github.com/ceholden/yatsm). This driver requires the following Python packages in addition to basic TSTools package dependencies: * [`scikit-learn`](http://scikit-learn.org/stable/) * [`patsy`](https://patsy.readthedocs.org/en/latest/) * [`yatsm`](https://github.com/ceholden/yatsm) """ description = 'YATSM CCDCesque Timeseries' location = None mask_values = np.array([2, 3, 4, 255]) has_results = True # Driver configuration config = OrderedDict(( ('stack_pattern', ConfigItem('Stack pattern', 'L*stack')), ('date_index', ConfigItem('Date index', [9, 16])), ('date_format', ConfigItem('Date format', '%Y%j')), ('cache_folder', ConfigItem('Cache folder', 'cache')), ('results_folder', ConfigItem('Results folder', 'YATSM')), ('results_pattern', ConfigItem('Results pattern', 'yatsm_r*')), ('mask_band', ConfigItem('Mask band', [8])), ('min_values', ConfigItem('Min data values', [0])), ('max_values', ConfigItem('Max data values', [10000])), ('metadata_file_pattern', ConfigItem('Metadata file pattern', 'L*MTL.txt')), ('calc_pheno', ConfigItem('LTM phenology', False)), )) # Driver controls controls_title = 'YATSM Algorithm Options' controls = OrderedDict(( ('calculate_live', ConfigItem('Calculate live', True)), ('consecutive', ConfigItem('Consecutive', 5)), ('min_obs', ConfigItem('Min obs.', 16)), ('threshold', ConfigItem('Threshold', 4.0)), ('enable_min_rmse', ConfigItem('Use min RMSE?', True)), ('min_rmse', ConfigItem('Min RMSE', 100.0)), ('design', ConfigItem('Design', '1 + x + harm(x, 1)')), ('test_indices', ConfigItem('Test indices', np.array([2, 3, 4, 5]))), ('dynamic_rmse', ConfigItem('Dynamic RMSE', True)), ('screen_crit', ConfigItem('Screening crit value', 400.0)), ('remove_noise', ConfigItem('Remove noise', True)), ('reverse', ConfigItem('Reverse', False)), ('regression_type', ConfigItem('Regression type', 'sklearn_Lasso20')), ('robust_results', ConfigItem('Robust results', False)), ('commit_test', ConfigItem('Commission test', False)), ('commit_alpha', ConfigItem('Commission test alpha', 0.10)), )) def __init__(self, location, config=None): super(YATSMTimeSeries, self).__init__(location, config=config) # Check for YATSM imports if not has_yatsm: raise ImportError(has_yatsm_msg) if self.config['calc_pheno'].value and not has_yatsm_pheno: raise ImportError(has_yatsm_pheno_msg) # Find extra metadata self._init_metadata() # Setup YATSM self.yatsm_model = None self.X = None self.Y = None self.coef_name = 'coef' # Setup min/max values desc, _min_values = self.config['min_values'] if len(_min_values) == 1: _min_values = np.repeat(_min_values, self.series[0].count - 1) self.config['min_values'] = ConfigItem(desc, _min_values) desc, _max_values = self.config['max_values'] if len(_max_values) == 1: _max_values = np.repeat(_max_values, self.series[0].count - 1) self.config['max_values'] = ConfigItem(desc, _max_values) def set_custom_controls(self, values): logger.debug('Setting custom values') for val, attr in zip(values, self.controls): desc, current_val = self.controls[attr] if isinstance(val, type(current_val)): self.controls[attr] = ConfigItem(desc, val) else: # Make an exception for minimum RMSE since we can pass None if attr == 'min_rmse' and isinstance(val, float): self.controls[attr] = ConfigItem(desc, val) else: msg = 'Could not set {k} to {v} (current: {c})'.format( k=attr, v=val, c=current_val) raise ValueError(msg) def fetch_results(self): """ Read or calculate results for current pixel """ if self.controls['calculate_live'].value: self._fetch_results_live() else: self._fetch_results_saved() # Update multitemporal screening metadata if self.yatsm_model: if (self.controls['calculate_live'] and hasattr(self.yatsm_model, 'X')): self.series[0].multitemp_screened = \ np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1], invert=True).astype(np.uint8) if self.config['calc_pheno'].value: for rec in self.yatsm_model.record: # Find dates in record idx = np.where( (self.series[0].images['ordinal'] >= rec['start']) & (self.series[0].images['ordinal'] <= rec['end']))[0] # Put observations into SPR/SUM/AUT _spr = np.where( self.series[0].images['doy'][idx] <= rec['spring_doy'] )[0] _sum = np.where( (self.series[0].images['doy'][idx] > rec['spring_doy']) & (self.series[0].images['doy'][idx] < rec['autumn_doy']))[0] _aut = np.where( self.series[0].images['doy'][idx] >= rec['autumn_doy'] )[0] self.series[0].pheno[idx[_spr]] = 'SPR' self.series[0].pheno[idx[_sum]] = 'SUM' self.series[0].pheno[idx[_aut]] = 'AUT' def get_prediction(self, series, band, dates=None): """ Return prediction for a given band Args: series (int): index of Series used for prediction band (int): index of band to return dates (iterable): list or np.ndarray of ordinal dates to predict; if None, predicts for every date within timeseries (default: None) Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing predictions """ if series > 0: return if self.yatsm_model is None or len(self.yatsm_model.record) == 0: return if band >= self.yatsm_model.record[self.coef_name].shape[2]: logger.debug('Not results for band %i' % band) return # Setup output mx = [] my = [] # Don't predict with any categorical information eqn = (self.controls['design'].value if self.controls['calculate_live'].value else self._design) design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', eqn) coef_columns = [] for k, v in self._design_info.iteritems(): if not re.match('C\(.*\)', k): coef_columns.append(v) coef_columns = np.sort(np.asarray(coef_columns)) for rec in self.yatsm_model.record: # Check for reverse if rec['end'] < rec['start']: i_step = -1 else: i_step = 1 # Date range to predict if dates is not None: end = max(rec['break'], rec['end']) _mx = dates[np.where((dates >= rec['start']) & (dates <= end))[0]] else: _mx = np.arange(rec['start'], rec['end'], i_step) if _mx.size == 0: continue # Coefficients to use for prediction _coef = rec[self.coef_name][coef_columns, band] # Setup design matrix _mX = patsy.dmatrix(design, {'x': _mx}).T # Predict _my = np.dot(_coef, _mX) # Transform ordinal back to datetime for plotting _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx]) mx.append(_mx) my.append(_my) return mx, my def get_breaks(self, series, band): """ Return break points for a given band Args: series (int): index of Series for prediction band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing break points """ if self.yatsm_model is None: return # Setup output bx = [] by = [] if len(self.yatsm_model.record) > 0: for rec in self.yatsm_model.record: if rec['break'] != 0: _bx = dt.fromordinal(int(rec['break'])) index = np.where( self.series[series].images['date'] == _bx)[0] if (index.size > 0 and index[0] < self.series[series].data.shape[1]): bx.append(_bx) by.append(self.series[series].data[band, index[0]]) else: logger.warning('Could not determine breakpoint') return bx, by def get_residuals(self, series, band): """ Return model residuals (y - predicted yhat) for a given band Args: series (int): index of Series for residuals band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing residual dates and values """ if self.yatsm_model is None: return rx, ry = [], [] X, y = self.get_data(series, band, mask=settings.plot['mask']) predict = self.get_prediction(series, band, dates=X['ordinal']) if predict is None: return date, yhat = predict for _date, _yhat in zip(date, yhat): idx = np.in1d(X['date'], _date) resid = y[idx] - _yhat rx.append(_date) ry.append(resid) return rx, ry def get_plot(self, series, band, axis, desc): """ Plot some information on an axis for a plot of some description Args: series (int): index of Series for residuals band (int): index of band to return axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on desc (str): description of plot, usually a plot class from `tstools.plots` Returns: iterable: list of artists to include in legend """ artists = [] if desc == 'TSPlot': for rec in self.yatsm_model.record: _x = (rec['start'] + rec['end']) / 2.0 _x, _y = self.get_prediction(series, band, dates=np.array([_x])) _x = _x[0][0] _y = _y[0][0] + 250 axis.text(_x, _y, 'RMSE: %.3f' % rec['rmse'][band], fontsize=18, horizontalalignment='center') elif desc == 'DOYPlot': has_dates = all([ r in self.yatsm_model.record.dtype.names for r in ('spring_doy', 'autumn_doy') ]) if self.config['calc_pheno'].value and has_dates: colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1] color_cycle = itertools.cycle(colors) for i, rec in enumerate(self.yatsm_model.record): col = [c for c in color_cycle.next()] artists.append( axis.axvline(rec['spring_doy'], label='Model %i' % i, c=col, lw=2)) axis.axvline(rec['autumn_doy'], label='Model %i' % i, c=col, lw=2) return artists # RESULTS HELPER METHODS def _fetch_results_saved(self): """ Read YATSM results and return """ self.yatsm_model = MockResult() row, col = self.series[0].py, self.series[0].px data_cfg = { 'output': os.path.join(self.location, self.config['results_folder'].value), 'output_prefix': (self.config['results_pattern'].value.replace('*', '')) } result_filename = get_output_name(data_cfg, row) logger.info('Attempting to open: {f}'.format(f=result_filename)) if not os.path.isfile(result_filename): qgis_log('Could not find result for row {r} ({fn})'.format( r=row, fn=result_filename)) return z = np.load(result_filename) if 'record' not in z.files: raise KeyError( 'Cannot find "record" within saved result ({})'.format( result_filename)) if 'metadata' not in z.files: raise KeyError( 'Cannot find "metadata" within saved result ({})'.format( result_filename)) metadata = z['metadata'].item() if 'design' not in metadata['YATSM']: raise KeyError('Cannot find "design" within saved result metadata ' '({})'.format(result_filename)) self._design = metadata['YATSM']['design_matrix'] self._design_info = metadata['YATSM']['design'] rec = z['record'] idx = np.where((rec['px'] == col) & (rec['py'] == row))[0] self.yatsm_model.record = rec[idx] def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix( self.controls['design'].value, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit(np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level) # SETUP def _init_metadata(self): """ Setup metadata for series """ # Find MTL file self.mtl_files = None if self.config['metadata_file_pattern'].value: search = find_files( self.location, self.config['metadata_file_pattern'].value, ignore_dirs=[self.config['results_folder'].value]) if len(search) == 0: logger.error( 'Could not find image metadata with pattern {p}'.format( p=self.config['metadata_file_pattern'].value)) if len(search) != len(self.series[0].images['date']): logger.error('Inconsistent number of metadata files found: ' '{0} images vs {1} metadata files)'.format( len(self.series[0].images['date']), len(search))) else: self.mtl_files = search # Setup metadata for series self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened'] self.series[0].metadata_names = [ 'Sensor', 'Path/Row', 'Multitemp Screened' ] self.series[0].metadata_table = [False, False, False] # Sensor ID self.series[0].sensor = np.array( [n[0:3] for n in self.series[0].images['filename']]) # Path/row self.series[0].pathrow = np.array([ 'p{p}r{r}'.format(p=n[3:6], r=n[6:9]) for n in self.series[0].images['filename'] ]) # Multitemporal noise screening - init to 0 (not screened) # Will update this during model fitting self.series[0].multitemp_screened = np.ones(self.series[0].n) # Make an entry 0 so we get this in the unique values self.series[0].multitemp_screened[0] = 0 # If we found MTL files, find cloud cover if self.mtl_files is not None: self.series[0].metadata.append('cloud_cover') self.series[0].metadata_names.append('Cloud cover') self.series[0].metadata_table.append(True) self.series[0].cloud_cover = np.ones(self.series[0].n) * -9999 cloud_cover = {} for mtl_file in self.mtl_files: attrs = parse_landsat_MTL(mtl_file, ['LANDSAT_SCENE_ID', 'CLOUD_COVER']) scene_ID = attrs.get('LANDSAT_SCENE_ID') if scene_ID: cloud_cover[scene_ID] = attrs.get('CLOUD_COVER', -9999.0) for idx, _id in enumerate(self.series[0].images['id']): self.series[0].cloud_cover[idx] = cloud_cover.get(_id, -9999.0) if self.config['calc_pheno'].value: self.series[0].metadata.append('pheno') self.series[0].metadata_names.append('Phenology') self.series[0].metadata_table.append(False) # Initialize almost all as summer (SUM); first two as SPR/AUT self.series[0].pheno = np.repeat('SUM', self.series[0].n) self.series[0].pheno[0] = 'SPR' self.series[0].pheno[1] = 'AUT'
class YATSMTimeSeries(timeseries_stacked.StackedTimeSeries): """ Timeseries driver for YATSM algorithm """ description = 'YATSM CCDCesque Timeseries' location = None mask_values = np.array([2, 3, 4, 255]) # Driver configuration _stack_pattern = 'L*stack' _date_index = [9, 16] _date_format = '%Y%j' _cache_folder = 'cache' _results_folder = 'YATSM' _results_pattern = 'yatsm_r*' _mask_band = [8] _min_values = [0] _max_values = [10000] _metadata_file_pattern = 'L*MTL.txt' _calc_pheno = False config = ['_stack_pattern', '_date_index', '_date_format', '_cache_folder', '_results_folder', '_results_pattern', '_mask_band', '_min_values', '_max_values', '_metadata_file_pattern', '_calc_pheno'] config_names = [ 'Stack pattern', 'Date index', 'Date format', 'Cache folder', 'Results folder', 'Results pattern', 'Mask band', 'Min data values', 'Max data values', 'Metadata file pattern', 'LTM phenology'] # Driver controls _calculate_live = True _consecutive = 5 _min_obs = 16 _threshold = 4.0 _enable_min_rmse = True _min_rmse = 100 _design = '1 + x + harm(x, 1)' _test_indices = np.array([2, 3, 4, 5]) _dynamic_rmse = True _screen_crit = 400.0 _remove_noise = True _reverse = False _robust_results = False _commit_test = False _commit_alpha = 0.01 # Requires YATSM>=v0.5.0 _regression_type = 'sklearn_Lasso20' controls_title = 'YATSM Algorithm Options' controls = [ '_calculate_live', '_consecutive', '_min_obs', '_threshold', '_enable_min_rmse', '_min_rmse', '_design', '_test_indices', '_dynamic_rmse', '_screen_crit', '_remove_noise', '_reverse', '_regression_type', '_robust_results', '_commit_test', '_commit_alpha'] controls_names = [ 'Calculate live', 'Consecutive', 'Min Observations', 'Threshold', 'Use min RMSE?', 'Min RMSE', 'Design', 'Test indices', 'Dynamic RMSE', 'Screening critical value', 'Remove noise', 'Run in reverse', 'Regression type', 'Robust results', 'Commission test', 'Commission test alpha'] def __init__(self, location, config=None): super(YATSMTimeSeries, self).__init__(location, config=config) # Check for YATSM imports self._check_yatsm() # Find extra metadata self._init_metadata() # Setup YATSM self.yatsm_model = None self.X = None self.Y = None self.coef_name = 'coef' # Setup min/max values if len(self._min_values) == 1: self._min_values = self._min_values * (self.series[0].count - 1) if len(self._max_values) == 1: self._max_values = self._max_values * (self.series[0].count - 1) self._min_values = np.asarray(self._min_values) self._max_values = np.asarray(self._max_values) def set_custom_controls(self, values): logger.debug('Setting custom values') for v, k in zip(values, self.controls): current_value = getattr(self, k) if isinstance(v, type(current_value)): setattr(self, k, v) else: # Make an exception for minimum RMSE since we can pass None if k == 'min_rmse' and isinstance(v, float): setattr(self, k, v) else: msg = 'Could not set {k} to {v} (current: {c})'.format( k=k, v=v, c=current_value) raise Exception(msg) def fetch_results(self): """ Read or calculate results for current pixel """ if self._calculate_live: self._fetch_results_live() else: self._fetch_results_saved() # Update multitemporal screening metadata if self.yatsm_model: self.series[0].multitemp_screened = \ np.in1d(self.X[:, 1], self.yatsm_model.X[:, 1], invert=True).astype(np.uint8) if self._calc_pheno: for rec in self.yatsm_model.record: # Find dates in record idx = np.where( (self.series[0].images['ordinal'] >= rec['start']) & (self.series[0].images['ordinal'] <= rec['end']))[0] # Put observations into SPR/SUM/AUT _spr = np.where(self.series[0].images['doy'][idx] <= rec['spring_doy'])[0] _sum = np.where((self.series[0].images['doy'][idx] > rec['spring_doy']) & (self.series[0].images['doy'][idx] < rec['autumn_doy']))[0] _aut = np.where(self.series[0].images['doy'][idx] >= rec['autumn_doy'])[0] self.series[0].pheno[idx[_spr]] = 'SPR' self.series[0].pheno[idx[_sum]] = 'SUM' self.series[0].pheno[idx[_aut]] = 'AUT' def get_prediction(self, series, band, dates=None): """ Return prediction for a given band Args: series (int): index of Series used for prediction band (int): index of band to return dates (iterable): list or np.ndarray of ordinal dates to predict; if None, predicts for every date within timeseries (default: None) Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing predictions """ if series > 0: return if self.yatsm_model is None or len(self.yatsm_model.record) == 0: return if band >= self.yatsm_model.record[self.coef_name].shape[2]: logger.debug('Not results for band %i' % band) return # Setup output mx = [] my = [] # Don't predict with any categorical information design = re.sub(r'[\+\-][\ ]+C\(.*\)', '', self._design) coef_columns = [] for k, v in self._design_info.column_name_indexes.iteritems(): if not re.match('C\(.*\)', k): coef_columns.append(v) coef_columns = np.asarray(coef_columns) for rec in self.yatsm_model.record: # Check for reverse if rec['end'] < rec['start']: i_step = -1 else: i_step = 1 # Date range to predict if dates is not None: end = max(rec['break'], rec['end']) _mx = dates[np.where((dates >= rec['start']) & (dates <= end))[0]] else: _mx = np.arange(rec['start'], rec['end'], i_step) if _mx.size == 0: continue # Coefficients to use for prediction _coef = rec[self.coef_name][coef_columns, band] # Setup design matrix _mX = patsy.dmatrix(design, {'x': _mx}).T # Predict _my = np.dot(_coef, _mX) # Transform ordinal back to datetime for plotting _mx = np.array([dt.fromordinal(int(_x)) for _x in _mx]) mx.append(_mx) my.append(_my) return mx, my def get_breaks(self, series, band): """ Return break points for a given band Args: series (int): index of Series for prediction band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing break points """ if self.yatsm_model is None: return # Setup output bx = [] by = [] if len(self.yatsm_model.record) > 0: for rec in self.yatsm_model.record: if rec['break'] != 0: _bx = dt.fromordinal(int(rec['break'])) index = np.where(self.series[series].images['date'] == _bx)[0] if (index.size > 0 and index[0] < self.series[series].data.shape[1]): bx.append(_bx) by.append(self.series[series].data[band, index[0]]) else: logger.warning('Could not determine breakpoint') return bx, by def get_residuals(self, series, band): """ Return model residuals (y - predicted yhat) for a given band Args: series (int): index of Series for residuals band (int): index of band to return Returns: iterable: sequence of tuples (1D NumPy arrays, x and y) containing residual dates and values """ if self.yatsm_model is None: return rx, ry = [], [] X, y = self.get_data(series, band, mask=settings.plot['mask']) predict = self.get_prediction(series, band, dates=X['ordinal']) if predict is None: return date, yhat = predict for _date, _yhat in zip(date, yhat): idx = np.in1d(X['date'], _date) resid = y[idx] - _yhat rx.append(_date) ry.append(resid) return rx, ry def get_plot(self, series, band, axis, desc): """ Plot some information on an axis for a plot of some description Args: series (int): index of Series for residuals band (int): index of band to return axis (matplotlib.axes._subplots.Axes): a matplotlib axis to plot on desc (str): description of plot, usually a plot class from `tstools.plots` Returns: iterable: list of artists to include in legend """ artists = [] if desc == 'TSPlot': for rec in self.yatsm_model.record: _x = (rec['start'] + rec['end']) / 2.0 _x, _y = self.get_prediction(series, band, dates=np.array([_x])) _x = _x[0][0] _y = _y[0][0] + 250 axis.text(_x, _y, 'RMSE: %.3f' % rec['rmse'][band], fontsize=18, horizontalalignment='center') elif desc == 'DOYPlot': names = self.yatsm_model.record.dtype.names if self._calc_pheno and all([r in names for r in ('spring_doy', 'autumn_doy')]): colors = mpl.cm.Set1(np.linspace(0, 1, 9))[:, :-1] color_cycle = itertools.cycle(colors) for i, rec in enumerate(self.yatsm_model.record): col = [c for c in color_cycle.next()] artists.append( axis.axvline(rec['spring_doy'], label='Model %i' % i, c=col, lw=2) ) axis.axvline(rec['autumn_doy'], label='Model %i' % i, c=col, lw=2) return artists # RESULTS HELPER METHODS def _fetch_results_saved(self): """ Read YATSM results and return """ raise NotImplementedError('No saved results reading just yet...') def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix(self._design, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self._mask_band[0] - 1, :] Y_data = np.delete(self.Y, self._mask_band[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self._min_values, self._max_values).astype(np.bool) clear *= valid # Setup Y # Setup parameters lm = sklearn.linear_model.Lasso(alpha=20) reg = self._regression_type print(self._regression_type) if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: lm = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning('Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( test_indices=self._test_indices, consecutive=self._consecutive, threshold=self._threshold, min_obs=self._min_obs, min_rmse=None if self._enable_min_rmse else self._min_rmse, screening_crit=self._screen_crit, remove_noise=self._remove_noise, dynamic_rmse=self._dynamic_rmse, ) self.yatsm_model = CCDCesque(lm=lm, **kwargs) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self._reverse: self.yatsm_model.fit( np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), np.fliplr(self.dates[clear])) else: self.yatsm_model.fit( self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self._commit_test: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self._commit_alpha) # if self._robust_results: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self._calc_pheno: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology(self.yatsm_model) self.yatsm_model.record = ltm.fit() # Restore log level logger.setLevel(log_level) # SETUP def _init_metadata(self): """ Setup metadata for series """ # Find MTL file self.mtl_files = None if self._metadata_file_pattern: search = find_files(self.location, self._metadata_file_pattern, ignore_dirs=[self._results_folder]) if len(search) == 0: logger.error( 'Could not find image metadata with pattern {p}'.format( p=self._metadata_file_pattern)) if len(search) != len(self.series[0].images['date']): logger.error('Inconsistent number of metadata files found: ' '{0} images vs {1} metadata files)'.format( len(self.series[0].images['date']), len(search))) else: self.mtl_files = search # Setup metadata for series self.series[0].metadata = ['sensor', 'pathrow', 'multitemp_screened'] self.series[0].metadata_names = ['Sensor', 'Path/Row', 'Multitemp Screened'] self.series[0].metadata_table = [False, False, False] # Sensor ID self.series[0].sensor = np.array([n[0:3] for n in self.series[0].images['filename']]) # Path/row self.series[0].pathrow = np.array([ 'p{p}r{r}'.format(p=n[3:6], r=n[6:9]) for n in self.series[0].images['filename']]) # Multitemporal noise screening - init to 0 (not screened) # Will update this during model fitting self.series[0].multitemp_screened = np.ones(self.series[0].n) # Make an entry 0 so we get this in the unique values self.series[0].multitemp_screened[0] = 0 # If we found MTL files, find cloud cover if self.mtl_files is not None: self.series[0].metadata.append('cloud_cover') self.series[0].metadata_names.append('Cloud cover') self.series[0].metadata_table.append(True) self.series[0].cloud_cover = np.zeros(self.series[0].n) for i, mtl_file in enumerate(self.mtl_files): self.series[0].cloud_cover[i] = parse_landsat_MTL( mtl_file, 'CLOUD_COVER') if self._calc_pheno: self.series[0].metadata.append('pheno') self.series[0].metadata_names.append('Phenology') self.series[0].metadata_table.append(False) # Initialize almost all as summer (SUM); first two as SPR/AUT self.series[0].pheno = np.repeat('SUM', self.series[0].n) self.series[0].pheno[0] = 'SPR' self.series[0].pheno[1] = 'AUT' def _check_yatsm(self): """ Check if YATSM is available """ try: global yatsm global CCDCesque, postprocess global harm global get_valid_mask import yatsm from yatsm.algorithms import CCDCesque, postprocess from yatsm._cyprep import get_valid_mask from yatsm.regression.transforms import harm except ImportError as e: raise ImportError('Could not import YATSM because it could not ' 'import a dependency (%s)' % e.message) except: raise ImportError('Could not import YATSM for an unknown reason ' '(%s)' % e.message) else: self.has_results = True if self._calc_pheno: try: global pheno import yatsm.phenology as pheno except: msg = ('Could not import YATSM phenology module. ' 'Make sure you have R and rpy2 installed.') raise ImportError(msg)
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix(self._design, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self._mask_band[0] - 1, :] Y_data = np.delete(self.Y, self._mask_band[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self._min_values, self._max_values).astype(np.bool) clear *= valid # Setup Y # Setup parameters lm = sklearn.linear_model.Lasso(alpha=20) reg = self._regression_type print(self._regression_type) if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: lm = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning('Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( test_indices=self._test_indices, consecutive=self._consecutive, threshold=self._threshold, min_obs=self._min_obs, min_rmse=None if self._enable_min_rmse else self._min_rmse, screening_crit=self._screen_crit, remove_noise=self._remove_noise, dynamic_rmse=self._dynamic_rmse, ) self.yatsm_model = CCDCesque(lm=lm, **kwargs) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self._reverse: self.yatsm_model.fit( np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), np.fliplr(self.dates[clear])) else: self.yatsm_model.fit( self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self._commit_test: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self._commit_alpha) # if self._robust_results: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self._calc_pheno: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology(self.yatsm_model) self.yatsm_model.record = ltm.fit() # Restore log level logger.setLevel(log_level)