Exemple #1
0
 def predict(data, threshold):
     weight = [0.2, 0.2, 0.2, 0.2, 0.2]
     from BaseDetectors.getScore import GetScore
     score = GetScore(data, threshold, seasonal)
     all_score = np.dot(weight, score)
     y_pre = util.score2label_threshold(all_score, threshold)
     return y_pre
Exemple #2
0
 def predict(self, data):
     self.train(data)
     diff_value = self.Z_Score(data)
     diff_value_normalize = util.normalize(abs(diff_value))
     result = util.score2label_threshold(
         score=diff_value_normalize, percentage=self.threshold_percentage)
     return diff_value_normalize, result
Exemple #3
0
def evaluate(name, isscore, value, y_true, threshold=0.99, txt=''):
    '''
    :param isscore: True:value=score, False:value=label
    :param value: score or label
    '''
    if isscore:
        score = np.copy(value)
        roc, pr = get_preformance(score, y_true)
        y_pre = score2label_threshold(score, threshold)
    else:
        y_pre = np.copy(value)
        roc = 0
        pr = 0

    if sum(y_true) == 0:
        text = txt + 'In this section, y_true is all 0!!\n'
        return 0, 0, 0, 0, 0, 0, 0, 0, y_pre, text

    from sklearn.metrics import precision_recall_fscore_support
    precision, recall, f1, _ = precision_recall_fscore_support(y_true=y_true, y_pred=y_pre,
                                                               average="binary")
    newresult = label_evaluation(y_true, y_pre)
    precision_eval, recall_eval, f1_eval, _ = precision_recall_fscore_support(y_true=y_true, y_pred=newresult,
                                                                              average="binary")
    text = str(name) + ',' + str(precision) + ',' + str(recall) + ',' + str(f1) + ',' + str(roc) + ',' + str(
        pr) + ',' + str(precision_eval) + ',' + str(recall_eval) + ',' + str(f1_eval) + ',' + txt + '\n'
    return precision, recall, f1, roc, pr, precision_eval, recall_eval, f1_eval, newresult, text
Exemple #4
0
    def predict(self, data, seasonal=None):
        diff_value, seasonal2 = self.seasonal_esd(data, seasonality=seasonal, max_anomalies=self.max_feed_len)
        # diff_value, result, seasonal2 = self.seasonal_esd(data, max_anomalies=self.max_feed_len, alpha=self.threshold_percentage)
        diff_value_normalize = util.normalize(diff_value)

        label = util.score2label_threshold(diff_value_normalize, percentage=self.threshold_percentage)
        # sesd.seasonal_esd(data, hybrid=False, max_anomalies=10, alpha=0.05)
        return abs(diff_value_normalize), label, seasonal2
Exemple #5
0
    def withstress_query(self, ts, score_list):
        # 获取集成后的异常分数 score
        score_vec = np.copy(score_list)
        score = np.dot(self.weight, score_vec)

        # 判断是否为异常,是否需要查询
        anomaly = util.score2label_threshold(score, self.threshold)
        is_anomaly = anomaly[-self.batch:]
        need_labeled = self.needlabeled(score)

        return is_anomaly, need_labeled, 0
Exemple #6
0
 def predict(self, score, y_true, dropstress=False):
     self.updata_weight(score, y_true)
     diff_value_normalize = util.getsum_score(score, self.weight)
     if dropstress:
         y_pre, y_stress, nostress_score = self.DropSeasonal(
             diff_value_normalize)
     else:
         y_pre = util.score2label_threshold(diff_value_normalize,
                                            self.prethreshold)
         y_stress = np.zeros(len(y_pre))
         nostress_score = np.copy(diff_value_normalize)
     return y_pre, y_stress, diff_value_normalize, nostress_score
Exemple #7
0
    def predict(self, data):
        Y = data
        types = self.types

        if types == 'linear':
            alpha, beta = self.alpha, self.beta
            a = [Y[0]]
            b = [Y[1] - Y[0]]
            y = [a[0] + b[0]]
            for i in range(len(Y)):
                a.append(alpha * Y[i] + (1 - alpha) * (a[i] + b[i]))
                b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i])
                y.append(a[i + 1] + b[i + 1])
        else:
            alpha, beta, gamma = self.alpha, self.beta, self.gama
            m = self.m
            a = [sum(Y[0:m]) / float(m)]
            b = [(sum(Y[m:2 * m]) - sum(Y[0:m])) / m**2]

            if types == 'additive':
                s = [Y[i] - a[0] for i in range(m)]
                # print(a,b,s)
                y = [a[0] + b[0] + s[0]]
                for i in range(len(Y)):
                    a.append(alpha * (Y[i] - s[i]) + (1 - alpha) *
                             (a[i] + b[i]))
                    b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i])
                    s.append(gamma * (Y[i] - a[i] - b[i]) + (1 - gamma) * s[i])
                    y.append(a[i + 1] + b[i + 1] + s[i + 1])

            elif types == 'multiplicative':
                s = [Y[i] / a[0] for i in range(m)]
                y = [(a[0] + b[0]) * s[0]]
                for i in range(len(Y)):
                    a.append(alpha * (Y[i] / s[i]) + (1 - alpha) *
                             (a[i] + b[i]))
                    b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i])
                    s.append(gamma * (Y[i] / (a[i] + b[i])) +
                             (1 - gamma) * s[i])
                    y.append((a[i + 1] + b[i + 1]) * s[i + 1])
            else:
                raise ValueError(
                    "ERROR: unsupported type, expect linear, additive or multiplicative."
                )
        y.pop()

        diff_value = abs(data - y)
        diff_value_normalize = util.normalize(abs(diff_value))
        result = util.score2label_threshold(
            score=diff_value_normalize, percentage=self.threshold_percentage)

        return diff_value_normalize, result
Exemple #8
0
    def predict(self, data):
        beta = self.beta
        predict_value = np.zeros(len(data))
        predict_value[0] = data[0]
        for i in range(1, len(data)):
            predict_value[i] = beta * predict_value[i - 1] + (1 -
                                                              beta) * data[i]

        diff_value = abs(data - predict_value)
        diff_value_normalize = util.normalize(abs(diff_value))
        result = util.score2label_threshold(
            score=diff_value_normalize, percentage=self.threshold_percentage)

        return diff_value_normalize, result
Exemple #9
0
    def update_weight(self):
        # 获取偏置w
        feedback = np.copy(self.feedback)
        feedback[feedback == 2] = 1

        index = np.where(feedback != -1)[0]

        # score = np.copy(self.score)    # [[MA],[EWMA],[Holt],[Zscore],[SESD]]

        score = [self.score[i][index] for i in range(len(self.score))]
        label = np.array(self.feedback)[index]

        from sklearn.metrics import accuracy_score
        w = np.zeros(len(score))
        for i in range(len(score)):
            singlescore = score[i]
            # singlescore = singlescore[index]
            pre_label = util.score2label_threshold(singlescore, self.threshold)
            f = accuracy_score(label, pre_label)
            w[i] = f

        # 通过生成200个随机权重来确定新的权重weight
        loss = [10000]
        count = 1
        last_loss = 0
        while abs(last_loss - min(loss)) / len(label) > 0.001:
            last_loss = min(loss)
            weightlist = [self.weight]
            for i in range(200):
                # 产生随机权重
                weight = np.zeros(len(w))
                for j in range(len(w)):
                    alpha = random.uniform(1, 1 + 1 / count)
                    mul_div = random.random()
                    if mul_div <= w[j]:
                        weight[j] = self.weight[j] * alpha
                    else:
                        weight[j] = self.weight[j] / alpha
                weight = util.norm_to_1(weight)
                weightlist.append(weight)
            loss = []
            for weight in weightlist:
                l1 = self.loss(score, label, weight)
                loss.append(l1)
            index = loss.index(min(loss))
            partbest_weight = weightlist[index]
            self.weight = partbest_weight
            count += 1
        return
Exemple #10
0
 def predict(self, data):
     win_size = self.win_size
     evaldata = np.copy(data)
     if self.quired == None:
         res = np.array([
             np.average(data[i:i + win_size])
             for i in range(len(data) - win_size)
         ])
         predict_value = np.concatenate((data[:win_size], res))
     else:
         for item in self.quired:
             if item[1] == 1 and item[0] != 0:  #anomaly
                 start = int(max(item[0] - win_size, 0))
                 evaldata[item[0]] = np.average(evaldata[start:item[0]])
         res = np.array([
             np.average(evaldata[i:i + win_size])
             for i in range(len(data) - win_size)
         ])
         predict_value = np.concatenate((evaldata[:win_size], res))
     diff_value = abs(data - predict_value)
     diff_value_normalize = util.normalize(abs(diff_value))
     result = util.score2label_threshold(
         score=diff_value_normalize, percentage=self.threshold_percentage)
     return diff_value_normalize, result
Exemple #11
0
    def getstresstest(self, score, repeat=20, d=0.1, p=0.5):
        '''
        :param score: 异常分数
        :param self.feedback: 1:outlier, 0:inlier, -1:unlabeled, 2:presure test
        :param repeat: 异常按周期出现的次数
        :param d: 异常出现周期与规定周期的波动,占比
        :param p: 允许的错差次数,占比
        :return: 判定为压力测试的点的标签
        '''
        if d >= 1:
            deviation = d
        else:
            deviation = int(d * self.seasonal)
        if deviation >= self.seasonal / 2:
            raise ValueError("deviation >= self.seasonal/2.", deviation,
                             self.seasonal / 2)
        diff_value_normalize = np.copy(score)  # util.norm_min_max(data)
        threshold = min(0.999, self.threshold - 2 / self.seasonal)
        label = util.score2label_threshold(diff_value_normalize,
                                           percentage=threshold)

        anomaly_indices = np.where(label == 1)[0]
        stresstest = []  # 判定为压力测试的点
        __len__ = len(score)
        stressnum = __len__ // self.seasonal  # 有stressnum个压力测试的点
        repeat = min(repeat, stressnum)
        for i in anomaly_indices:
            # 遍历每一个异常
            if i <= repeat * self.seasonal / 2:
                forward = i // self.seasonal
                back = repeat - forward
            elif i >= __len__ - repeat * self.seasonal / 2:
                back = (__len__ - i) // self.seasonal
                forward = repeat - back
            else:
                back = repeat // 2
                forward = back

            flag = 0
            for j in range(1, back):
                # 如果在向后找back次数内,都间隔seasonal再次出现异常
                start = max(0, i + self.seasonal * j - deviation)
                end = min(__len__ - 1, i + self.seasonal * j + deviation)
                if sum(label[start:end]) != 0:
                    flag += 1
            for j in range(1, forward):
                # 如果在向前找forward次数内,都间隔seasonal再次出现异常
                start = max(0, i - self.seasonal * j - deviation)
                end = min(__len__ - 1, i - self.seasonal * j + deviation)
                if sum(label[start:end]) != 0:
                    flag += 1
            if flag > p * repeat:
                stresstest.append(i)
        if len(stresstest) == 0:
            label_stress = np.zeros(len(score))
            return label_stress

        final_stress_index = []
        for curr in range(len(stresstest)):
            line = (len(final_stress_index) / 2 + len(stresstest)) * p
            stress_score = self.calculate_test_statistic(
                stresstest, final_stress_index, deviation)  # 得分最高的标签,及其分数
            if max(stress_score) < line:
                break
            max_idx = int(np.argmax(stress_score))
            final_stress_index.append(stresstest[max_idx])
            stresstest.pop(max_idx)
        if len(final_stress_index) != 0:
            diff_value_normalize[final_stress_index] = 0

        label_stress = np.zeros(len(score))
        label_stress[final_stress_index] = 1
        return label_stress
Exemple #12
0
    def DropSeasonal(self, score, repeat=20, d=0.1, p=0.5):
        '''
        :param score: 异常分数
        :param repeat: 异常按周期出现的次数
        :param deviation: 异常出现周期与规定周期的波动
        :param p: 允许的错差次数
        :return:
        '''
        if d >= 1:
            deviation = d
        else:
            deviation = int(d * self.seasonal)
        if deviation >= self.seasonal / 2:
            raise ValueError("deviation >= self.seasonal/2.", deviation,
                             self.seasonal / 2)
        diff_value_normalize = np.copy(score)  # Utils.norm_min_max(data)
        # threshold = min(0.999, self.truethreshold - 2 / self.seasonal)
        threshold = self.truethreshold
        label = util.score2label_threshold(diff_value_normalize,
                                           percentage=threshold)
        anomaly_indices = np.where(label == 1)[0]
        stresstest = []  # 判定为压力测试的点
        __len__ = len(score)
        stressnum = int(__len__ / self.seasonal)  # 有stressnum个压力测试的点
        repeat = min(repeat, stressnum)
        for i in anomaly_indices:
            # 遍历每一个异常
            import math
            if i <= repeat * self.seasonal / 2:
                forward = math.ceil(i / self.seasonal)
                back = repeat - forward
                # forward = 0
            elif i >= __len__ - repeat * self.seasonal / 2:
                back = math.ceil((__len__ - i) / self.seasonal)
                forward = repeat - back
            else:
                back = math.ceil(repeat / 2)
                forward = back

            flag = 0
            for j in range(1, back):
                # 如果在向后找back次数内,都间隔seasonal再次出现异常
                start = max(0, i + self.seasonal * j - deviation)
                end = min(__len__ - 1, i + self.seasonal * j + deviation)
                if sum(label[start:end]) != 0:
                    flag += 1
            for j in range(1, forward):
                # 如果在向前找forward次数内,都间隔seasonal再次出现异常
                start = max(0, i - self.seasonal * j - deviation)
                end = min(__len__ - 1, i - self.seasonal * j + deviation)
                if sum(label[start:end]) != 0:
                    flag += 1
            if flag > p * repeat:
                stresstest.append(i)
        if len(stresstest) == 0:
            label = util.score2label_threshold(diff_value_normalize,
                                               percentage=self.prethreshold)
            return label, label, [], score, stresstest
        final_stress_index = []
        for curr in range(len(stresstest)):
            line = (len(final_stress_index) / 2 + len(stresstest)) * p
            stress_score = self.calculate_test_statistic(
                stresstest, final_stress_index, label,
                deviation)  # 得分最高的标签,及其分数
            if max(stress_score) < line:
                break
            max_idx = int(np.argmax(stress_score))
            final_stress_index.append(stresstest[max_idx])
            stresstest.pop(max_idx)
        if len(final_stress_index) != 0:
            diff_value_normalize[final_stress_index] = 0

        # label = util.score2label_threshold(diff_value_normalize, percentage=self.prethreshold)
        y_stress = np.zeros(len(label))
        y_stress[final_stress_index] = 1

        label[final_stress_index] = 0

        return label, y_stress, diff_value_normalize
Exemple #13
0
 def score2label(self, y_scores):
     label = util.score2label_threshold(
         y_scores, percentage=self.threshold_percentage)
     return label