def test_commission_real_change(sim_real_change): """ Test commission test's ability to avoid merging real changes This test is run with a relatively large p value (very likely to reject H0 and retain changes) """ record = commission_test(sim_real_change, 0.10) assert len(record) == len(sim_real_change.record)
def test_commission_nochange(sim_nochange): """ In no change situation, we should get back exactly what we gave in """ record = commission_test(sim_nochange, 0.10) assert len(record) == 1 np.testing.assert_array_equal(record, sim_nochange.record)
def test_commission_no_real_change_2(sim_no_real_change_2): """ Test commission test's ability to resolve two spurious changes """ record = commission_test(sim_no_real_change_2, 0.01) assert len(record) == 1 assert record[0]['break'] == 0
def line(ctx, config, job_number, total_jobs, resume, check_cache, do_not_run, verbose_yatsm): if verbose_yatsm: logger_algo.setLevel(logging.DEBUG) # Parse config cfg = parse_config_file(config) if ('phenology' in cfg and cfg['phenology'].get('enable')) and not pheno: click.secho('Could not import yatsm.phenology but phenology metrics ' 'are requested', fg='red') click.secho('Error: %s' % pheno_exception, fg='red') raise click.Abort() # Make sure output directory exists and is writable output_dir = cfg['dataset']['output'] try: os.makedirs(output_dir) except OSError as e: # File exists if e.errno == 17: pass elif e.errno == 13: click.secho('Cannot create output directory %s' % output_dir, fg='red') raise click.Abort() if not os.access(output_dir, os.W_OK): click.secho('Cannot write to output directory %s' % output_dir, fg='red') raise click.Abort() # Test existence of cache directory read_cache, write_cache = test_cache(cfg['dataset']) logger.info('Job {i} of {n} - using config file {f}'.format(i=job_number, n=total_jobs, f=config)) df = csvfile_to_dataframe(cfg['dataset']['input_file'], cfg['dataset']['date_format']) df['image_ID'] = get_image_IDs(df['filename']) # Get attributes of one of the images nrow, ncol, nband, dtype = get_image_attribute(df['filename'][0]) # Calculate the lines this job ID works on job_lines = distribute_jobs(job_number, total_jobs, nrow) logger.debug('Responsible for lines: {l}'.format(l=job_lines)) # Calculate X feature input dates = np.asarray(df['date']) kws = {'x': dates} kws.update(df.to_dict()) X = patsy.dmatrix(cfg['YATSM']['design_matrix'], kws) cfg['YATSM']['design'] = X.design_info.column_name_indexes # Form YATSM class arguments fit_indices = np.arange(cfg['dataset']['n_bands']) if cfg['dataset']['mask_band'] is not None: fit_indices = fit_indices[:-1] if cfg['YATSM']['reverse']: X = np.flipud(X) # Create output metadata to save md = { 'YATSM': cfg['YATSM'], cfg['YATSM']['algorithm']: cfg[cfg['YATSM']['algorithm']] } if cfg['phenology']['enable']: md.update({'phenology': cfg['phenology']}) # Begin process start_time_all = time.time() for line in job_lines: out = get_output_name(cfg['dataset'], line) if resume: try: np.load(out) except: pass else: logger.debug('Already processed line %s' % line) continue logger.debug('Running line %s' % line) start_time = time.time() Y = read_line(line, df['filename'], df['image_ID'], cfg['dataset'], ncol, nband, dtype, read_cache=read_cache, write_cache=write_cache, validate_cache=False) if do_not_run: continue if cfg['YATSM']['reverse']: Y = np.fliplr(Y) output = [] for col in np.arange(Y.shape[-1]): _Y = Y.take(col, axis=2) # Mask idx_mask = cfg['dataset']['mask_band'] - 1 valid = cyprep.get_valid_mask( _Y, cfg['dataset']['min_values'], cfg['dataset']['max_values']).astype(bool) valid *= np.in1d(_Y.take(idx_mask, axis=0), cfg['dataset']['mask_values'], invert=True).astype(np.bool) _Y = np.delete(_Y, idx_mask, axis=0)[:, valid] _X = X[valid, :] _dates = dates[valid] # Run model cls = cfg['YATSM']['algorithm_cls'] algo_cfg = cfg[cfg['YATSM']['algorithm']] yatsm = cls(lm=cfg['YATSM']['prediction_object'], **algo_cfg.get('init', {})) yatsm.px = col yatsm.py = line try: yatsm.fit(_X, _Y, _dates, **algo_cfg.get('fit', {})) except TSLengthException: continue if yatsm.record is None or len(yatsm.record) == 0: continue # Postprocess if cfg['YATSM'].get('commission_alpha'): yatsm.record = postprocess.commission_test( yatsm, cfg['YATSM']['commission_alpha']) for prefix, lm in zip(cfg['YATSM']['refit']['prefix'], cfg['YATSM']['refit']['prediction_object']): yatsm.record = postprocess.refit_record(yatsm, prefix, lm, keep_regularized=True) if cfg['phenology']['enable']: pcfg = cfg['phenology'] ltm = pheno.LongTermMeanPhenology(**pcfg.get('init', {})) yatsm.record = ltm.fit(yatsm, **pcfg.get('fit', {})) output.extend(yatsm.record) logger.debug(' Saving YATSM output to %s' % out) np.savez(out, record=np.array(output), version=__version__, metadata=md) run_time = time.time() - start_time logger.debug('Line %s took %ss to run' % (line, run_time)) logger.info('Completed {n} lines in {m} minutes'.format( n=len(job_lines), m=round((time.time() - start_time_all) / 60.0, 2)))
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix( self.controls['design'].value, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit(np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level)
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix(self.controls['design'].value, {'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow}) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit( np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit( self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level)