예제 #1
0
    def _run(self, score_window, smoothing_window, smooth, rec_error_type,
             expected):
        sequences, _ = reconstruction_errors(self.y, self.y_hat,
                                             self.STEP_SIZE, score_window,
                                             smoothing_window, smooth,
                                             rec_error_type)

        assert_allclose(sequences, expected, rtol=1e-2)
예제 #2
0
def score_anomalies(y,
                    y_hat,
                    critic,
                    index,
                    score_window=10,
                    critic_smooth_window=None,
                    error_smooth_window=None,
                    smooth=True,
                    rec_error_type="point",
                    comb="mult",
                    lambda_rec=0.5):
    """Compute an array of anomaly scores.

    Anomaly scores are calculated using a combination of reconstruction error and critic score.

    Args:
        y (ndarray):
            Ground truth.
        y_hat (ndarray):
            Predicted values. Each timestamp has multiple predictions.
        index (ndarray):
            time index for each y (start position of the window)
        critic (ndarray):
            Critic score. Each timestamp has multiple critic scores.
        score_window (int):
            Optional. Size of the window over which the scores are calculated.
            If not given, 10 is used.
        critic_smooth_window (int):
            Optional. Size of window over which smoothing is applied to critic.
            If not given, 200 is used.
        error_smooth_window (int):
            Optional. Size of window over which smoothing is applied to error.
            If not given, 200 is used.
        smooth (bool):
            Optional. Indicates whether errors should be smoothed.
            If not given, `True` is used.
        rec_error_type (str):
            Optional. The method to compute reconstruction error. Can be one of
            `["point", "area", "dtw"]`. If not given, 'point' is used.
        comb (str):
            Optional. How to combine critic and reconstruction error. Can be one
            of `["mult", "sum", "rec"]`. If not given, 'mult' is used.
        lambda_rec (float):
            Optional. Used if `comb="sum"` as a lambda weighted sum to combine
            scores. If not given, 0.5 is used.

    Returns:
        ndarray:
            Array of anomaly scores.
    """

    critic_smooth_window = critic_smooth_window or math.trunc(
        y.shape[0] * 0.01)
    error_smooth_window = error_smooth_window or math.trunc(y.shape[0] * 0.01)

    step_size = 1  # expected to be 1

    true_index = index  # no offset

    true = [item[0] for item in y.reshape((y.shape[0], -1))]

    for item in y[-1][1:]:
        true.extend(item)

    critic_extended = list()
    for c in critic:
        critic_extended.extend(np.repeat(c, y_hat.shape[1]).tolist())

    critic_extended = np.asarray(critic_extended).reshape((-1, y_hat.shape[1]))

    critic_kde_max = []
    pred_length = y_hat.shape[1]
    num_errors = y_hat.shape[1] + step_size * (y_hat.shape[0] - 1)

    for i in range(num_errors):
        critic_intermediate = []

        for j in range(max(0, i - num_errors + pred_length),
                       min(i + 1, pred_length)):
            critic_intermediate.append(critic_extended[i - j, j])

        if len(critic_intermediate) > 1:
            discr_intermediate = np.asarray(critic_intermediate)
            try:
                critic_kde_max.append(discr_intermediate[np.argmax(
                    stats.gaussian_kde(discr_intermediate)(
                        critic_intermediate))])
            except np.linalg.LinAlgError:
                critic_kde_max.append(np.median(discr_intermediate))
        else:
            critic_kde_max.append(np.median(np.asarray(critic_intermediate)))

    # Compute critic scores
    critic_scores = _compute_critic_score(critic_kde_max, critic_smooth_window)

    # Compute reconstruction scores
    rec_scores, predictions = reconstruction_errors(y, y_hat, step_size,
                                                    score_window,
                                                    error_smooth_window,
                                                    smooth, rec_error_type)

    rec_scores = stats.zscore(rec_scores)
    rec_scores = np.clip(rec_scores, a_min=0, a_max=None) + 1

    # Combine the two scores
    if comb == "mult":
        final_scores = np.multiply(critic_scores, rec_scores)

    elif comb == "sum":
        final_scores = (1 - lambda_rec) * (critic_scores -
                                           1) + lambda_rec * (rec_scores - 1)

    elif comb == "rec":
        final_scores = rec_scores

    else:
        raise ValueError(
            'Unknown combination specified {}, use "mult", "sum", or "rec" instead.'
            .format(comb))

    true = [[t] for t in true]
    return final_scores, true_index, true, predictions
예제 #3
0
def score_anomalies(y: ndarray,
                    ry_hat: ndarray,
                    y_hat: ndarray,
                    fy_hat: ndarray,
                    smoothing_window: float = 0.01,
                    smooth: bool = True,
                    mask: bool = True,
                    comb: str = 'mult',
                    lambda_rec: float = 0.5,
                    rec_error_type: str = "dtw"):
    """Compute an array of absolute errors comparing predictions and expected output.

    If smooth is True, apply EWMA to the resulting array of errors.

    Args:
        y (ndarray):
            Ground truth.
        ry_hat (ndarray):
            Predicted values (reverse).
        y_hat (ndarray):
            Predicted values. Each timestamp has multiple predictions.
        fy_hat (ndarray):
            Predicted values (forward).
        smoothing_window (float):
            Optional. Size of the smoothing window, expressed as a proportion of the total
            length of y. If not given, 0.01 is used.
        smooth (bool):
            Optional. Indicates whether the returned errors should be smoothed with EWMA.
            If not given, `True` is used.
        mask (bool): bool = True
            Optional. Mask anomaly score errors in the beginning.
            If not given, `True` is used.
        rec_error_type (str):
            Optional. The method to compute reconstruction error. Can be one of
            `["point", "area", "dtw"]`. If not given, 'dtw' is used.
        comb (str):
            Optional. How to combine critic and reconstruction error. Can be one
            of `["mult", "sum", "rec"]`. If not given, 'mult' is used.
        lambda_rec (float):
            Optional. Used if `comb="sum"` as a lambda weighted sum to combine
            scores. If not given, 0.5 is used.

    Returns:
        ndarray:
            Array of errors.
    """

    reg_scores = bi_regression_errors(y,
                                      ry_hat,
                                      fy_hat,
                                      smoothing_window=smoothing_window,
                                      smooth=smooth,
                                      mask=mask)
    rec_scores, _ = reconstruction_errors(y[:, 1:-1],
                                          y_hat,
                                          smoothing_window=smoothing_window,
                                          smooth=smooth,
                                          rec_error_type=rec_error_type)
    mask_steps = int(smoothing_window * len(y)) if mask else 0
    rec_scores[:mask_steps] = min(rec_scores)
    rec_scores = np.concatenate([np.zeros(1), rec_scores, np.zeros(1)])

    scores = None
    if comb == "mult":
        reg_scores = MinMaxScaler([1, 2]).fit_transform(
            reg_scores.reshape(-1, 1)).flatten()
        rec_scores = MinMaxScaler([1, 2]).fit_transform(
            rec_scores.reshape(-1, 1)).flatten()
        scores = np.multiply(reg_scores, rec_scores)

    elif comb == "sum":
        reg_scores = MinMaxScaler([0, 1]).fit_transform(
            reg_scores.reshape(-1, 1)).flatten()
        rec_scores = MinMaxScaler([0, 1]).fit_transform(
            rec_scores.reshape(-1, 1)).flatten()
        scores = (1 - lambda_rec) * reg_scores + lambda_rec * rec_scores

    elif comb == "rec":
        scores = rec_scores

    elif comb == "reg":
        scores = reg_scores

    return scores