def get_daykind_curve(filename, value_index=1): #if get_class(filename) == 1: # print("Not daykind curve !!!! Cannot Handle! \n\n") # return data = pd.read_csv(filename) value = np.array(data[data.columns[value_index]]) key = Bf_period(pre_process(value)) if key <= 0.005: c = 0 if key > 0.005 and key <= 0.008: c = 1 if key > 0.008: c = 2 if c == 2: model = joblib.load('curve_model.pkl') ts = pre_process(value) feature = pd.DataFrame() for featurename in sorted(feature_dict): func = feature_dict[featurename] feature[featurename] = [func(ts)] p = model.predict([np.array(feature.iloc[0])]) c = int(p + 1) print(filename, c) res = anomaly_detect_func[c](value) #plt.show() plt.savefig(filename[:-4], dpi=500) plt.close() data['is_anomaly'] = res data.to_csv(filename[:-4] + '_anomaly.csv', index=False)
def visual(filename_list, pred, index=1, Length=3000): classindex = [] for c in range(cluster_num): temp = [] for i in range(len(pred)): if pred[i] == c: temp.append(i) classindex.append(temp) ind = 0 for i in range(len(classindex)): plt.figure() plt.suptitle('Class: "{}"'.format(i)) L = classindex[i] N = math.sqrt(len(L)) + 1 for i in range(len(L)): data = pd.read_csv(filename_list[L[i]]) if len(data) > Length: data = data[:Length] value = data[data.columns[index]] value = np.array(value) plt.subplot(N, N, i + 1) plt.xticks([]) plt.yticks([]) plt.plot(value) plt.plot(pre_process(value, 200, _normalize=False)) plt.savefig('Figure_for_Class{}'.format(ind), dpi=600) ind += 1
def visual_withseason(filename_list, pred, cluster_num, index = 1, Length = 3000): classindex = [] for c in range(cluster_num): temp = [] for i in range(len(pred)): if pred[i] == c: temp.append(i) classindex.append(temp) ind = 0 for i in range(len(classindex)): plt.figure() plt.suptitle('Class: "{}"'.format(i)) L = classindex[i] N = math.sqrt(len(L)) + 1 for i in range(len(L)): data = pd.read_csv(filename_list[L[i]]) if len(data) > Length: data = data[:Length] value = np.array(data[data.columns[index]]) value = pre_process(value) plt.subplot(N, N, i + 1) plt.xticks([]) plt.yticks([]) plt.plot(value, linewidth = 0.5) diff, season, abs_value, trend = get_seasonal_diff(value) plt.plot(season + trend, linewidth = 0.5) plt.savefig('Figure_for_Class{}'.format(ind), dpi = 1000) ind += 1
def load_data(data, seq_len, ratio, pred_len): data = list(data) temp = pre_process(data, _normalize=False) down, up = float(min(temp)), float(max(temp)) for i in range(len(data)): data[i] = float(data[i]) data[i] = float((data[i] - down) / (up - down)) sequence_length = seq_len + pred_len result = [] for index in range(len(data) - sequence_length): result.append(data[index:index + sequence_length]) result = np.array(result) row = round(ratio * result.shape[0]) train = result[:int(row), :] np.random.shuffle(train) x_train = train[:, :-pred_len] y_train = train[:, -pred_len:] x_test = result[int(row):, :-pred_len] y_test = result[int(row):, -pred_len:] x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) return (x_train, y_train, x_test, y_test, down, up)
def stl_decompose(ts): res = seasonal_decompose(ts, freq = 288) for i in range(0,3): print(res.seasonal[i * 288: (i + 1) * 288]) print(len(res.seasonal)) plt.show(res.plot()) plt.figure() plt.plot(pre_process(ts)) plt.plot(normalize(res.seasonal)) plt.show()
def get_daykind_curve(filename, value_index): #if get_class(filename) == 1: # print("Not daykind curve !!!! Cannot Handle! \n\n") # return data = pd.read_csv(filename) value = np.array(data[data.columns[value_index]]) key = Bf_period(pre_process(value)) if key <= 0.005: c = 0 if key > 0.005 and key <= 0.008: c = 1 if key > 0.008: c = 2 res = anomaly_detect_func[c](value) plt.show()
def f(ts): T = 288 ts = np.array(ts) ts = pre_process(ts, 200) std = holtWinters_forclass(ts) std = normalize(std) N = int(len(ts) / T) ans = 0 for i in range(0, N): for j in range(0, T): ans += abs(ts[i * T + j] - std[j]) ans = ans / (N * T) print(ts) print(std) print(ans) return ans
def get_seasonal_diff(ts, T = 288, _normalize = True): # return best fitted seasonal_diff, season series, absolute diff and the average trend used. # ---------------------------------------------------------------------------- ts = np.array(ts) ts = pre_process(ts, _normalize = _normalize) res = seasonal_decompose(ts, freq = T) start, end = min(ts), max(ts) season = res.seasonal _abs, diff = [], [] for j in range(100): temp0, temp1 = get_abs_diff(ts, season, start + j * (end - start) / 100) _abs.append(temp0) diff.append(temp1) ind, _min = 0, len(ts) * max(ts) for i in range(len(_abs)): if _abs[i] < _min: _min = _abs[i] ind = i return (diff[ind], season, _abs[ind], start + (ind - 1) * (end - start) / 100)
def anomaly_stable_diff(ts): mul = 3 diff, season, abs_value, trend = get_seasonal_diff(ts, _normalize=False) value = pre_process(ts, _normalize=False) sigma, diff_mean = [], [] N = len(ts) // 288 for j in range(288): temp = [] for i in range(N): temp.append(value[i * 288 + j] - season[j] - trend) temp = sorted(temp, key=abs)[:-len(temp) // 3] sigma.append(np.std(temp)) diff_mean.append(np.mean(temp)) ''' time_diff = get_time_diff(ts) plt.figure() plt.plot(time_diff) plt.show() plt.close() ''' anomaly = [] for i in range(len(ts)): if abs(ts[i] - season[i % 288] - trend - diff_mean[i % 288]) > mul * sigma[i % 288]: if find_time(ts[i], i % 288, season + trend, sigma, diff_mean, 1) > 12 and find_time(ts[i], i % 288, season + trend, sigma, diff_mean, -1) > 12: anomaly.append(1) else: anomaly.append(0) else: anomaly.append(0) anomaly_x, anomaly_y = _getanomaly_graph(anomaly, ts) plt.figure() plt.scatter(anomaly_x, anomaly_y, marker='x', s=10, c='red') plt.plot(ts) plt.title('Stable') plt.plot(season + trend, linewidth=1) return anomaly