def alerts_info(data, L_plus, delta, wdw_length, clf, reg, L_minus=None, k=None, cut=None, verbose=True, wdw_shift=0): """ Applies the two-sided CUSUM chart on a series. This function returns the size and the form ('jumps', 'drifts', 'oscillating shifts') of the shifts after each alert. Parameters ---------- data : 1D-array A single series of standardized observations to be monitored. L_plus : float Value for the positive control limit. delta : float >= 0 The target shift size. wdw_length : int > 0 The length of the input vector. clf : support vector classification model The trained classifier. reg : support vector regression model The trained regressor. L_minus : float, optional Value for the negative control limit. Default is None. When None, L_minus = - L_plus. k : float, optional The allowance parameter. The default is None. When None, k = delta/2 (optimal formula for iid normal data). cut : float, optional Upper value for the chart statistics. Values of the positive (resp. negative) chart statistics are constrained to be equal to or lower than 'cut' (resp. equal to or superior than -'cut'). When None, cut is equal to '2L_plus'. The default is None. Verbose : bool, optional Flag to print the percentage of alert in the series. Default is True. wdw_shift : int, optional Shift that is applied to the predictions of the SVMs, to be better aligned with the actual deviations. The default is 0. Returns ------- form_plus : 1D-array Predicted shift forms after positive alerts. When no alerts is detected, the shift forms are set to NaNs. form_minus : 1D-array Predicted shift forms after negative alerts. When no alerts is detected, the shift forms are set to NaNs. size_plus : 1D-array Predicted shift sizes after positive alerts. When no alerts is detected, the shift sizes are set to NaNs. size_minus : 1D-array Predicted shift sizes after negative alerts. When no alerts is detected, the shift sizes are set to NaNs. C_plus : Values of the positive chart statistic. C_minus : Values of the negative chart statistic. """ assert np.ndim(data) == 1, "Input data must be a 1D array (one series)" n_obs = len(data) if L_minus is None: L_minus = -L_plus if k is None: k = abs(delta) / 2 if cut is None: cut = L_plus * 2 wdw_shift = int(wdw_shift) assert wdw_shift < wdw_length, "wdw_shift should be inferior to wdw_length" length = len(data[~np.isnan(data)]) input_minus = np.zeros((n_obs, wdw_length)) input_plus = np.zeros((n_obs, wdw_length)) input_minus[:] = np.nan input_plus[:] = np.nan flag_plus = np.zeros((n_obs)) flag_minus = np.zeros((n_obs)) C_plus = np.zeros((n_obs)) C_minus = np.zeros((n_obs)) for i in range(wdw_length, n_obs): ## CUSUM monitoring C_plus[i] = min(cut, max(0, C_plus[i - 1] + data[i] - k)) #avoid cusum "explosion" if C_plus[i] > L_plus: #alert flag_plus[i] = 1 input_plus[i, :] = data[i + 1 - wdw_length:i + 1] C_minus[i] = max(-cut, min(0, C_minus[i - 1] + data[i] + k)) if C_minus[i] < L_minus: #alert flag_minus[i] = 1 input_minus[i, :] = data[i + 1 - wdw_length:i + 1] ## compute percentage of alerts oc_p = np.nonzero(flag_plus) oc_m = np.nonzero(flag_minus) #if alert both for pos and neg limits, count for only one alert oc_both = len(set(np.concatenate((oc_p[0], oc_m[0])))) #OC_perc = oc_both*100/n_obs #total period OC_perc = oc_both * 100 / length #observing period if verbose: print("Percentage of alerts: %0.2f" % OC_perc) ## interpolate NaNs in input vectors input_minus_valid, ind_minus = svm.fill_nan(input_minus) input_plus_valid, ind_plus = svm.fill_nan(input_plus) ## shift the indexes of the SVM predictions ## otherwise the deviation predicted at time t is based on ## the entire wdw_length ind_minus = ind_minus - wdw_shift ind_plus = ind_plus - wdw_shift ##apply classifier and regressor on (filled-up) input vectors size_minus = np.zeros((n_obs)) size_plus = np.zeros((n_obs)) size_minus[:] = np.nan size_plus[:] = np.nan form_minus = np.zeros((n_obs)) form_plus = np.zeros((n_obs)) form_minus[:] = np.nan form_plus[:] = np.nan if len(ind_minus) > 0: #at least one value size_minus[ind_minus] = reg.predict(input_minus_valid) form_minus[ind_minus] = clf.predict(input_minus_valid) if len(ind_plus) > 0: size_plus[ind_plus] = reg.predict(input_plus_valid) form_plus[ind_plus] = clf.predict(input_plus_valid) return (form_plus, form_minus, size_plus, size_minus, C_plus, C_minus)
### Compute the predictions (sizes and shapes) of the networks #======================================================================= ### for a particular station stat = [i for i in range(len(station_names)) if station_names[i] == 'UC'][0] ### separate the data from the selected station into blocks blocks = np.zeros((n_obs, block_length)) blocks[:] = np.nan blocks[block_length - 1:, :] = bb.MBB(data[:, stat].reshape(-1, 1), block_length, NaN=True, all_NaN=False) ### interpolate NaNs in input vectors input_valid, ind = svm.fill_nan(blocks) ### reshape input vectors to match input dimensions input_valid = np.reshape(input_valid, (input_valid.shape[0], 1, input_valid.shape[1])) ### apply classifier and regressor on (filled-up) input vectors size_pred = np.zeros((n_obs, 1)) size_pred[:] = np.nan shape_pred = np.zeros((n_obs)) shape_pred[:] = np.nan if len(ind) > 0: #at least one value size_pred[ind] = regressor.predict(input_valid) shape_pred[ind] = classifier.predict_classes(input_valid) #======================================================================== ### Compute the cut-off values of the network