-
Notifications
You must be signed in to change notification settings - Fork 0
/
util_Runner_Model.py
82 lines (71 loc) · 2.16 KB
/
util_Runner_Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
from sklearn.metrics import precision_recall_fscore_support as pr
from math import floor
more_than_three = lambda x: abs(x)>=3.0
more_than_two = lambda x: abs(x)>=2.0
more_than_one = lambda x: abs(x)>=1.0
kwargDefaults = {
'splitLength': 36,
'numSplits': 10
}
def split_time_series(data, kwargs={}, backwards=True):
'''
RETURNS:
Array of split data
'''
key = 'splitLength'
splitLength = kwargs.get(key, kwargDefaults[key])
out = []
if backwards:
for i in xrange(len(data), 0, -splitLength):
out.append(data[max(i-splitLength, 0):i])
out.reverse()
return out
else:
len_ = len(data)
for i in xrange(0, len_, splitLength):
out.append(data[i:min(i+splitLength, len_)])
return out
# def split_time_series(data, kwargs):
# '''
# uses least squares to find optimal splits
# RETURNS:
# Array of split data
# '''
# raise NotImplementedError
# for i in range(1,self.dataLength-1):
# splitVar = np.std(regressorData[0:i])+np.std(regressorData[i:dataLength])
def level_outlier_proportion(raw_data, predicted_data, check_fn=more_than_three):
assert len(raw_data) == len(predicted_data)
raw_arrays = split_time_series(raw_data)
pred_arrays = split_time_series(predicted_data)
count_ = 0
len_ = 0
for (raw_array, pred_array) in zip(raw_arrays, pred_arrays):
sublen_ = len(raw_array)
if sublen_ >= 12:
mean = np.mean(raw_array)
std = np.std(raw_array)
normDist = lambda x: (x-mean)/std
count_ += sum(map(check_fn, map(normDist, pred_array)))
len_ += sublen_
return count_*1.0/len_
def change_outlier_proportion(raw_data, predicted_data, check_fn=more_than_three):
assert len(raw_data) == len(predicted_data)
raw_arrays = split_time_series(np.ediff1d(raw_data))
pred_arrays = split_time_series(np.ediff1d(predicted_data))
count_ = 0
len_ = 0
for (raw_array, pred_array) in zip(raw_arrays, pred_arrays):
sublen_ = len(raw_array)
if sublen_ >= 12:
mean = np.mean(raw_array)
std = np.std(raw_array)
normDist = lambda x: (x-mean)/std
count_ += sum(map(check_fn, map(normDist, pred_array)))
len_ += sublen_
return count_*1.0/len_
# def count_outliers(data, mean, std, df):
# if df>30:
# else:
# map(dist)