def get_daykind_curve(filename, value_index=1):
    #if get_class(filename) == 1:
    #	print("Not daykind curve !!!! Cannot Handle! \n\n")
    #	return
    data = pd.read_csv(filename)
    value = np.array(data[data.columns[value_index]])
    key = Bf_period(pre_process(value))
    if key <= 0.005:
        c = 0
    if key > 0.005 and key <= 0.008:
        c = 1
    if key > 0.008:
        c = 2
    if c == 2:
        model = joblib.load('curve_model.pkl')
        ts = pre_process(value)
        feature = pd.DataFrame()
        for featurename in sorted(feature_dict):
            func = feature_dict[featurename]
            feature[featurename] = [func(ts)]
        p = model.predict([np.array(feature.iloc[0])])
        c = int(p + 1)
    print(filename, c)
    res = anomaly_detect_func[c](value)
    #plt.show()
    plt.savefig(filename[:-4], dpi=500)
    plt.close()
    data['is_anomaly'] = res
    data.to_csv(filename[:-4] + '_anomaly.csv', index=False)
Ejemplo n.º 2
0
def visual(filename_list, pred, index=1, Length=3000):
    classindex = []
    for c in range(cluster_num):
        temp = []
        for i in range(len(pred)):
            if pred[i] == c:
                temp.append(i)
        classindex.append(temp)

    ind = 0
    for i in range(len(classindex)):
        plt.figure()
        plt.suptitle('Class: "{}"'.format(i))
        L = classindex[i]
        N = math.sqrt(len(L)) + 1
        for i in range(len(L)):
            data = pd.read_csv(filename_list[L[i]])
            if len(data) > Length:
                data = data[:Length]
            value = data[data.columns[index]]
            value = np.array(value)
            plt.subplot(N, N, i + 1)
            plt.xticks([])
            plt.yticks([])
            plt.plot(value)
            plt.plot(pre_process(value, 200, _normalize=False))
        plt.savefig('Figure_for_Class{}'.format(ind), dpi=600)
        ind += 1
Ejemplo n.º 3
0
def visual_withseason(filename_list, pred, cluster_num, index = 1, Length = 3000):
	classindex = []
	for c in range(cluster_num):
		temp = []
		for i in range(len(pred)):
			if pred[i] == c:
				temp.append(i)
		classindex.append(temp)

	ind = 0
	for i in range(len(classindex)):
		plt.figure()
		plt.suptitle('Class: "{}"'.format(i))
		L = classindex[i]
		N = math.sqrt(len(L)) + 1
		for i in range(len(L)):
			data = pd.read_csv(filename_list[L[i]])
			if len(data) > Length:
				data = data[:Length]
			value = np.array(data[data.columns[index]])
			value = pre_process(value)
			plt.subplot(N, N, i + 1)
			plt.xticks([])
			plt.yticks([])
			plt.plot(value, linewidth = 0.5)
			diff, season, abs_value, trend = get_seasonal_diff(value)
			plt.plot(season + trend, linewidth = 0.5)
		plt.savefig('Figure_for_Class{}'.format(ind), dpi = 1000)
		ind += 1
Ejemplo n.º 4
0
def load_data(data, seq_len, ratio, pred_len):
    data = list(data)
    temp = pre_process(data, _normalize=False)
    down, up = float(min(temp)), float(max(temp))
    for i in range(len(data)):
        data[i] = float(data[i])
        data[i] = float((data[i] - down) / (up - down))
    sequence_length = seq_len + pred_len
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index:index + sequence_length])

    result = np.array(result)

    row = round(ratio * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-pred_len]
    y_train = train[:, -pred_len:]
    x_test = result[int(row):, :-pred_len]
    y_test = result[int(row):, -pred_len:]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    return (x_train, y_train, x_test, y_test, down, up)
Ejemplo n.º 5
0
def stl_decompose(ts):
	res = seasonal_decompose(ts, freq = 288)
	for i in range(0,3):
		print(res.seasonal[i * 288: (i + 1) * 288])
	print(len(res.seasonal))
	plt.show(res.plot())
	plt.figure()
	plt.plot(pre_process(ts))
	plt.plot(normalize(res.seasonal))
	plt.show()
Ejemplo n.º 6
0
def get_daykind_curve(filename, value_index):
    #if get_class(filename) == 1:
    #	print("Not daykind curve !!!! Cannot Handle! \n\n")
    #	return
    data = pd.read_csv(filename)
    value = np.array(data[data.columns[value_index]])
    key = Bf_period(pre_process(value))
    if key <= 0.005:
        c = 0
    if key > 0.005 and key <= 0.008:
        c = 1
    if key > 0.008:
        c = 2
    res = anomaly_detect_func[c](value)
    plt.show()
Ejemplo n.º 7
0
def f(ts):
	T = 288
	ts = np.array(ts)
	ts = pre_process(ts, 200)
	std = holtWinters_forclass(ts)
	std = normalize(std)
	N = int(len(ts) / T)
	ans = 0
	for i in range(0, N):
		for j in range(0, T):
			ans += abs(ts[i * T + j] - std[j])
	ans = ans / (N * T)
	print(ts)
	print(std)
	print(ans)
	return ans
Ejemplo n.º 8
0
def get_seasonal_diff(ts, T = 288, _normalize = True):
	# return best fitted seasonal_diff, season series, absolute diff and the average trend used. 
	# ----------------------------------------------------------------------------
	ts = np.array(ts)
	ts = pre_process(ts, _normalize = _normalize)
	res = seasonal_decompose(ts, freq = T)
	start, end = min(ts), max(ts)
	season = res.seasonal
	_abs, diff = [], []
	for j in range(100):
		temp0, temp1 = get_abs_diff(ts, season, start + j * (end - start) / 100)
		_abs.append(temp0)
		diff.append(temp1)
	ind, _min = 0, len(ts) * max(ts)
	for i in range(len(_abs)):
		if _abs[i] < _min:
			_min = _abs[i]
			ind = i
	return (diff[ind], season, _abs[ind], start + (ind - 1) * (end - start) / 100)
def anomaly_stable_diff(ts):
    mul = 3
    diff, season, abs_value, trend = get_seasonal_diff(ts, _normalize=False)
    value = pre_process(ts, _normalize=False)
    sigma, diff_mean = [], []
    N = len(ts) // 288
    for j in range(288):
        temp = []
        for i in range(N):
            temp.append(value[i * 288 + j] - season[j] - trend)
        temp = sorted(temp, key=abs)[:-len(temp) // 3]
        sigma.append(np.std(temp))
        diff_mean.append(np.mean(temp))
    '''
	time_diff = get_time_diff(ts)
	plt.figure()
	plt.plot(time_diff)
	plt.show()
	plt.close()
	'''
    anomaly = []
    for i in range(len(ts)):
        if abs(ts[i] - season[i % 288] - trend -
               diff_mean[i % 288]) > mul * sigma[i % 288]:
            if find_time(ts[i], i % 288, season + trend, sigma, diff_mean,
                         1) > 12 and find_time(ts[i], i % 288, season + trend,
                                               sigma, diff_mean, -1) > 12:
                anomaly.append(1)
            else:
                anomaly.append(0)
        else:
            anomaly.append(0)
    anomaly_x, anomaly_y = _getanomaly_graph(anomaly, ts)
    plt.figure()
    plt.scatter(anomaly_x, anomaly_y, marker='x', s=10, c='red')
    plt.plot(ts)
    plt.title('Stable')
    plt.plot(season + trend, linewidth=1)
    return anomaly