def permanent_snow_procedure(dates, observations, fitter_fn, quality, proc_params): """ Snow procedure for when there is a significant amount snow represented in the quality information This method essentially fits a 4 coefficient model across all the observations Args: dates: list of ordinal day numbers relative to some epoch, the particular epoch does not matter. observations: values for one or more spectra corresponding to each time. fitter_fn: a function used to fit observation values and acquisition dates for each spectra. quality: QA information for each observation proc_params: dictionary of processing parameters Returns: list: Change models for each observation of each spectra. 1-d ndarray: processing mask indicating which values were used for model fitting """ # TODO do this better meow_size = proc_params.MEOW_SIZE curve_qa = proc_params.CURVE_QA['PERSIST_SNOW'] avg_days_yr = proc_params.AVG_DAYS_YR fit_max_iter = proc_params.LASSO_MAX_ITER num_coef = proc_params.COEFFICIENT_MIN processing_mask = qa.snow_procedure_filter(observations, quality, dates, proc_params) period = dates[processing_mask] spectral_obs = observations[:, processing_mask] if np.sum(processing_mask) < meow_size: return [], processing_mask models = [fitter_fn(period, spectrum, fit_max_iter, avg_days_yr, num_coef) for spectrum in spectral_obs] magnitudes = np.zeros(shape=(observations.shape[0],)) # White space is cheap, so let's use it result = results_to_changemodel(fitted_models=models, start_day=dates[0], end_day=dates[-1], break_day=0, magnitudes=magnitudes, observation_count=np.sum(processing_mask), change_probability=0, curve_qa=curve_qa) return (result,), processing_mask
def catch(dates, observations, fitter_fn, processing_mask, model_window, curve_qa, proc_params): """ Handle special cases where general models just need to be fitted and return their results. Args: dates: list of ordinal day numbers relative to some epoch, the particular epoch does not matter. observations: spectral values, list of spectra -> values model_window: span of indices that is represented in the current process fitter_fn: function used to model observations processing_mask: 1-d boolean array identifying which values to consider for processing Returns: namedtuple representing the time segment """ # TODO do this better avg_days_yr = proc_params.AVG_DAYS_YR fit_max_iter = proc_params.LASSO_MAX_ITER num_coef = proc_params.COEFFICIENT_MIN log.debug('Catching observations: %s', model_window) period = dates[processing_mask] spectral_obs = observations[:, processing_mask] # Subset the data based on the model window model_period = period[model_window] model_spectral = spectral_obs[:, model_window] models = [fitter_fn(model_period, spectrum, fit_max_iter, avg_days_yr, num_coef) for spectrum in model_spectral] try: break_day = period[model_window.stop] except: break_day = period[-1] result = results_to_changemodel(fitted_models=models, start_day=period[model_window.start], end_day=period[model_window.stop - 1], break_day=break_day, magnitudes=np.zeros(shape=(7,)), observation_count=( model_window.stop - model_window.start), change_probability=0, curve_qa=curve_qa) return result
def permanent_snow_procedure(dates, observations, fitter_fn, quality, meow_size=defaults.MEOW_SIZE): """ Snow procedure for when there is a significant amount snow represented in the quality information This method essentially fits a 4 coefficient model across all the observations Args: dates: list of ordinal day numbers relative to some epoch, the particular epoch does not matter. observations: values for one or more spectra corresponding to each time. fitter_fn: a function used to fit observation values and acquisition dates for each spectra. quality: QA information for each observation meow_size: minimum expected observation window needed to produce a fit. Returns: list: Change models for each observation of each spectra. 1-d ndarray: processing mask indicating which values were used for model fitting """ processing_mask = qa.snow_procedure_filter(observations, quality) period = dates[processing_mask] spectral_obs = observations[:, processing_mask] if np.sum(processing_mask) < meow_size: return [], processing_mask models = [fitter_fn(period, spectrum, 4) for spectrum in spectral_obs] magnitudes = np.zeros(shape=(observations.shape[0], )) # White space is cheap, so let's use it result = results_to_changemodel(fitted_models=models, start_day=dates[0], end_day=dates[-1], break_day=0, magnitudes=magnitudes, observation_count=np.sum(processing_mask), change_probability=0, num_coefficients=4) return (result, ), processing_mask
def lookforward(dates, observations, model_window, fitter_fn, processing_mask, variogram, proc_params): """Increase observation window until change is detected or we are out of observations. Args: dates: list of ordinal day numbers relative to some epoch, the particular epoch does not matter. observations: spectral values, list of spectra -> values model_window: span of indices that is represented in the current process fitter_fn: function used to model observations processing_mask: 1-d boolean array identifying which values to consider for processing variogram: 1-d array of variogram values to compare against for the normalization factor proc_params: dictionary of processing parameters Returns: namedtuple: representation of the time segment 1-d bool ndarray: processing mask that may have been modified slice: model window """ # TODO do this better peek_size = proc_params.PEEK_SIZE coef_min = proc_params.COEFFICIENT_MIN coef_mid = proc_params.COEFFICIENT_MID coef_max = proc_params.COEFFICIENT_MAX num_obs_fact = proc_params.NUM_OBS_FACTOR detection_bands = proc_params.DETECTION_BANDS change_thresh = proc_params.CHANGE_THRESHOLD outlier_thresh = proc_params.OUTLIER_THRESHOLD avg_days_yr = proc_params.AVG_DAYS_YR fit_max_iter = proc_params.LASSO_MAX_ITER # Step 4: lookforward. # The second step is to update a model until observations that do not # fit the model are found. log.debug('lookforward initial model window: %s', model_window) # The fit_window pertains to which locations are used in the model # regression, while the model_window identifies the locations in which # fitted models apply to. They are not always the same. fit_window = model_window # Initialized for a check at the first iteration. models = None # Simple value to determine if change has occured or not. Change may not # have occurred if we reach the end of the time series. change = 0 # Initial subset of the data period = dates[processing_mask] spectral_obs = observations[:, processing_mask] # Used for comparison purposes fit_span = period[model_window.stop - 1] - period[model_window.start] # stop is always exclusive while model_window.stop + peek_size < period.shape[0] or models is None: num_coefs = determine_num_coefs(period[model_window], coef_min, coef_mid, coef_max, num_obs_fact) peek_window = slice(model_window.stop, model_window.stop + peek_size) # Used for comparison against fit_span model_span = period[model_window.stop - 1] - period[model_window.start] log.debug('Detecting change for %s', peek_window) # If we have less than 24 observations covered by the model_window # or it the first iteration, then we always fit a new window if not models or model_window.stop - model_window.start < 24: fit_span = period[model_window.stop - 1] - period[ model_window.start] fit_window = model_window log.debug('Retrain models, less than 24 samples') models = [fitter_fn(period[fit_window], spectrum, fit_max_iter, avg_days_yr, num_coefs) for spectrum in spectral_obs[:, fit_window]] residuals = np.array([calc_residuals(period[peek_window], spectral_obs[idx, peek_window], models[idx], avg_days_yr) for idx in range(observations.shape[0])]) comp_rmse = [models[idx].rmse for idx in detection_bands] # More than 24 points else: # If the number of observations that the current fitted models # expand past a threshold, then we need to fit new ones. # The 1.33 should be parametrized at some point. if model_span >= 1.33 * fit_span: log.debug('Retrain models, model_span: %s fit_span: %s', model_span, fit_span) fit_span = period[model_window.stop - 1] - period[ model_window.start] fit_window = model_window models = [fitter_fn(period[fit_window], spectrum, fit_max_iter, avg_days_yr, num_coefs) for spectrum in spectral_obs[:, fit_window]] residuals = np.array([calc_residuals(period[peek_window], spectral_obs[idx, peek_window], models[idx], avg_days_yr) for idx in range(observations.shape[0])]) # We want to use the closest residual values to the peek_window # values based on seasonality. closest_indexes = find_closest_doy(period, peek_window.stop - 1, fit_window, 24) # Calculate an RMSE for the seasonal residual values, using 8 # as the degrees of freedom. comp_rmse = [euclidean_norm(models[idx].residual[closest_indexes]) / 4 for idx in detection_bands] # Calculate the change magnitude values for each observation in the # peek_window. magnitude = change_magnitude(residuals[detection_bands, :], variogram[detection_bands], comp_rmse) if detect_change(magnitude, change_thresh): log.debug('Change detected at: %s', peek_window.start) # Change was detected, return to parent method change = 1 break elif detect_outlier(magnitude[0], outlier_thresh): log.debug('Outlier detected at: %s', peek_window.start) # Keep track of any outliers so they will be excluded from future # processing steps processing_mask = update_processing_mask(processing_mask, peek_window.start) # Because only one value was excluded, we shouldn't need to adjust # the model_window. The location hasn't been used in # processing yet. So, the next iteration can use the same windows # without issue. period = dates[processing_mask] spectral_obs = observations[:, processing_mask] continue model_window = slice(model_window.start, model_window.stop + 1) result = results_to_changemodel(fitted_models=models, start_day=period[model_window.start], end_day=period[model_window.stop - 1], break_day=period[peek_window.start], magnitudes=np.median(residuals, axis=1), observation_count=( model_window.stop - model_window.start), change_probability=change, curve_qa=num_coefs) return result, processing_mask, model_window