def lookforward(dates, observations, model_window, fitter_fn, processing_mask, variogram, proc_params): """Increase observation window until change is detected or we are out of observations. Args: dates: list of ordinal day numbers relative to some epoch, the particular epoch does not matter. observations: spectral values, list of spectra -> values model_window: span of indices that is represented in the current process fitter_fn: function used to model observations processing_mask: 1-d boolean array identifying which values to consider for processing variogram: 1-d array of variogram values to compare against for the normalization factor proc_params: dictionary of processing parameters Returns: namedtuple: representation of the time segment 1-d bool ndarray: processing mask that may have been modified slice: model window """ # TODO do this better peek_size = proc_params.PEEK_SIZE coef_min = proc_params.COEFFICIENT_MIN coef_mid = proc_params.COEFFICIENT_MID coef_max = proc_params.COEFFICIENT_MAX num_obs_fact = proc_params.NUM_OBS_FACTOR detection_bands = proc_params.DETECTION_BANDS change_thresh = proc_params.CHANGE_THRESHOLD outlier_thresh = proc_params.OUTLIER_THRESHOLD avg_days_yr = proc_params.AVG_DAYS_YR fit_max_iter = proc_params.LASSO_MAX_ITER # Step 4: lookforward. # The second step is to update a model until observations that do not # fit the model are found. log.debug('lookforward initial model window: %s', model_window) # The fit_window pertains to which locations are used in the model # regression, while the model_window identifies the locations in which # fitted models apply to. They are not always the same. fit_window = model_window # Initialized for a check at the first iteration. models = None # Simple value to determine if change has occured or not. Change may not # have occurred if we reach the end of the time series. change = 0 # Initial subset of the data period = dates[processing_mask] spectral_obs = observations[:, processing_mask] # Used for comparison purposes fit_span = period[model_window.stop - 1] - period[model_window.start] # stop is always exclusive while model_window.stop + peek_size < period.shape[0] or models is None: num_coefs = determine_num_coefs(period[model_window], coef_min, coef_mid, coef_max, num_obs_fact) peek_window = slice(model_window.stop, model_window.stop + peek_size) # Used for comparison against fit_span model_span = period[model_window.stop - 1] - period[model_window.start] log.debug('Detecting change for %s', peek_window) # If we have less than 24 observations covered by the model_window # or it the first iteration, then we always fit a new window if not models or model_window.stop - model_window.start < 24: fit_span = period[model_window.stop - 1] - period[ model_window.start] fit_window = model_window log.debug('Retrain models, less than 24 samples') models = [fitter_fn(period[fit_window], spectrum, fit_max_iter, avg_days_yr, num_coefs) for spectrum in spectral_obs[:, fit_window]] residuals = np.array([calc_residuals(period[peek_window], spectral_obs[idx, peek_window], models[idx], avg_days_yr) for idx in range(observations.shape[0])]) comp_rmse = [models[idx].rmse for idx in detection_bands] # More than 24 points else: # If the number of observations that the current fitted models # expand past a threshold, then we need to fit new ones. # The 1.33 should be parametrized at some point. if model_span >= 1.33 * fit_span: log.debug('Retrain models, model_span: %s fit_span: %s', model_span, fit_span) fit_span = period[model_window.stop - 1] - period[ model_window.start] fit_window = model_window models = [fitter_fn(period[fit_window], spectrum, fit_max_iter, avg_days_yr, num_coefs) for spectrum in spectral_obs[:, fit_window]] residuals = np.array([calc_residuals(period[peek_window], spectral_obs[idx, peek_window], models[idx], avg_days_yr) for idx in range(observations.shape[0])]) # We want to use the closest residual values to the peek_window # values based on seasonality. closest_indexes = find_closest_doy(period, peek_window.stop - 1, fit_window, 24) # Calculate an RMSE for the seasonal residual values, using 8 # as the degrees of freedom. comp_rmse = [euclidean_norm(models[idx].residual[closest_indexes]) / 4 for idx in detection_bands] # Calculate the change magnitude values for each observation in the # peek_window. magnitude = change_magnitude(residuals[detection_bands, :], variogram[detection_bands], comp_rmse) if detect_change(magnitude, change_thresh): log.debug('Change detected at: %s', peek_window.start) # Change was detected, return to parent method change = 1 break elif detect_outlier(magnitude[0], outlier_thresh): log.debug('Outlier detected at: %s', peek_window.start) # Keep track of any outliers so they will be excluded from future # processing steps processing_mask = update_processing_mask(processing_mask, peek_window.start) # Because only one value was excluded, we shouldn't need to adjust # the model_window. The location hasn't been used in # processing yet. So, the next iteration can use the same windows # without issue. period = dates[processing_mask] spectral_obs = observations[:, processing_mask] continue model_window = slice(model_window.start, model_window.stop + 1) result = results_to_changemodel(fitted_models=models, start_day=period[model_window.start], end_day=period[model_window.stop - 1], break_day=period[peek_window.start], magnitudes=np.median(residuals, axis=1), observation_count=( model_window.stop - model_window.start), change_probability=change, curve_qa=num_coefs) return result, processing_mask, model_window
def lookback(dates, observations, model_window, models, previous_break, processing_mask, variogram, proc_params): """ Special case when there is a gap between the start of a time series model and the previous model break point, this can include values that were excluded during the initialization step. Args: dates: list of ordinal days observations: spectral values across bands model_window: current window of values that is being considered models: currently fitted models for the model_window previous_break: index value of the previous break point, or the start of the time series if there wasn't one processing_mask: index values that are currently being masked out from processing variogram: 1-d array of variogram values to compare against for the normalization factor proc_params: dictionary of processing parameters Returns: slice: window of indices to be used array: indices of data that have been flagged as outliers """ # TODO do this better peek_size = proc_params.PEEK_SIZE detection_bands = proc_params.DETECTION_BANDS change_thresh = proc_params.CHANGE_THRESHOLD outlier_thresh = proc_params.OUTLIER_THRESHOLD avg_days_yr = proc_params.AVG_DAYS_YR log.debug('Previous break: %s model window: %s', previous_break, model_window) period = dates[processing_mask] spectral_obs = observations[:, processing_mask] while model_window.start > previous_break: # Three conditions to see how far we want to look back each iteration. # 1. If we have more than 6 previous observations # 2. Catch to make sure we don't go past the start of observations # 3. Less than 6 observations to look at # Important note about python slice objects, start is inclusive and # stop is exclusive, regardless of direction/step if model_window.start - previous_break > peek_size: peek_window = slice(model_window.start - 1, model_window.start - peek_size, -1) elif model_window.start - peek_size <= 0: peek_window = slice(model_window.start - 1, None, -1) else: peek_window = slice(model_window.start - 1, previous_break - 1, -1) log.debug('Considering index: %s using peek window: %s', peek_window.start, peek_window) residuals = np.array([calc_residuals(period[peek_window], spectral_obs[idx, peek_window], models[idx], avg_days_yr) for idx in range(observations.shape[0])]) # log.debug('Residuals for peek window: %s', residuals) comp_rmse = [models[idx].rmse for idx in detection_bands] log.debug('RMSE values for comparison: %s', comp_rmse) magnitude = change_magnitude(residuals[detection_bands, :], variogram[detection_bands], comp_rmse) if detect_change(magnitude, change_thresh): log.debug('Change detected for index: %s', peek_window.start) # change was detected, return to parent method break elif detect_outlier(magnitude[0], outlier_thresh): log.debug('Outlier detected for index: %s', peek_window.start) processing_mask = update_processing_mask(processing_mask, peek_window.start) period = dates[processing_mask] spectral_obs = observations[:, processing_mask] # Because this location was used in determining the model_window # passed in, we must now account for removing it. model_window = slice(model_window.start - 1, model_window.stop - 1) continue log.debug('Including index: %s', peek_window.start) model_window = slice(peek_window.start, model_window.stop) return model_window, processing_mask
def initialize(dates, observations, fitter_fn, model_window, processing_mask, variogram, proc_params): """ Determine a good starting point at which to build off of for the subsequent process of change detection, both forward and backward. Args: dates: 1-d ndarray of ordinal day values observations: 2-d ndarray representing the spectral values fitter_fn: function used for the regression portion of the algorithm model_window: start index of time/observation window processing_mask: 1-d boolean array identifying which values to consider for processing variogram: 1-d array of variogram values to compare against for the normalization factor proc_params: dictionary of processing parameters Returns: slice: model window that was deemed to be a stable start namedtuple: fitted regression models """ # TODO do this better meow_size = proc_params.MEOW_SIZE day_delta = proc_params.DAY_DELTA detection_bands = proc_params.DETECTION_BANDS tmask_bands = proc_params.TMASK_BANDS change_thresh = proc_params.CHANGE_THRESHOLD tmask_scale = proc_params.T_CONST avg_days_yr = proc_params.AVG_DAYS_YR fit_max_iter = proc_params.LASSO_MAX_ITER period = dates[processing_mask] spectral_obs = observations[:, processing_mask] log.debug('Initial %s', model_window) models = None while model_window.stop + meow_size < period.shape[0]: # Finding a sufficient window of time needs to run # each iteration because the starting point # will increment if the model isn't stable, incrementing only # the window stop in lock-step does not guarantee a 1-year+ # time-range. if not enough_time(period[model_window], day_delta): model_window = slice(model_window.start, model_window.stop + 1) continue # stop = find_time_index(dates, model_window, meow_size, day_delta) # model_window = slice(model_window.start, stop) log.debug('Checking window: %s', model_window) # Count outliers in the window, if there are too many outliers then # try again. tmask_outliers = tmask.tmask(period[model_window], spectral_obs[:, model_window], variogram, tmask_bands, tmask_scale, avg_days_yr) tmask_count = np.sum(tmask_outliers) log.debug('Number of Tmask outliers found: %s', tmask_count) # Subset the data to the observations that currently under scrutiny # and remove the outliers identified by the tmask. tmask_period = period[model_window][~tmask_outliers] # TODO should probably look at a different fit procedure to handle # the following case. if tmask_count == model_window.stop - model_window.start: log.debug('Tmask identified all values as outliers') model_window = slice(model_window.start, model_window.stop + 1) continue # Make sure we still have enough observations and enough time after # the tmask removal. if not enough_time(tmask_period, day_delta) or \ not enough_samples(tmask_period, meow_size): log.debug('Insufficient time or observations after Tmask, ' 'extending model window') model_window = slice(model_window.start, model_window.stop + 1) continue # Update the persistent mask with the values identified by the Tmask if any(tmask_outliers): processing_mask = update_processing_mask(processing_mask, tmask_outliers, model_window) # The model window now actually refers to a smaller slice model_window = slice(model_window.start, model_window.stop - tmask_count) # Update the subset period = dates[processing_mask] spectral_obs = observations[:, processing_mask] log.debug('Generating models to check for stability') models = [fitter_fn(period[model_window], spectrum, fit_max_iter, avg_days_yr, 4) for spectrum in spectral_obs[:, model_window]] # If a model is not stable, then it is possible that a disturbance # exists somewhere in the observation window. The window shifts # forward in time, and begins initialization again. if not stable(models, period[model_window], variogram, change_thresh, detection_bands): model_window = slice(model_window.start + 1, model_window.stop + 1) log.debug('Unstable model, shift window to: %s', model_window) models = None continue else: log.debug('Stable start found: %s', model_window) break return model_window, models, processing_mask