forked from QGDM2018/utfp
/
runex.py
92 lines (79 loc) · 3.27 KB
/
runex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from pre_process import PreProcessor, get_testroad_adjoin, pd, np
import matplotlib.pyplot as plt
from model.ARMA import predict
from feature_en import FeatureEn
import tqdm
from model.AP import ap_predict
plt.rcParams['font.sans-serif'] = ['Simhei']
plt.rcParams['axes.unicode_minus'] = False
def arma_ex(term='first'):
prp = PreProcessor(term) # 数据管理器
preMapping = get_testroad_adjoin(prp)
for pre_id in tqdm.tqdm(preMapping.keys()):
instand_id = preMapping[pre_id]
try:
pred_pre = predict(prp.get_roadflow_by_road(instand_id))
except Exception as e:
print(instand_id, end='\t')
print(e)
continue
pred_pre.to_frame()
pred_pre = pred_pre.dropna(axis=0, how="any")
for i in range(len(pred_pre)):
pred_pre.iloc[[i]] = int(pred_pre.iloc[[i]])
pred = pd.DataFrame(pred_pre.values, columns=['value'])
pred['timestamp'] = pred_pre.index
pred['date'] = pred['timestamp'].apply(lambda x: x.strftime('%d'))
pred['timeBegin'] = pred['timestamp'].apply(lambda x: x.strftime('%H:%M'))
pred['crossroadID'] = instand_id
pred['min_time'] = pred['timestamp'].apply(lambda x: int(x.strftime('%M')))
pred = pred[pred['min_time'] >= 30]
pred.drop(['timestamp'], axis=1, inplace=True)
order = ['date', 'crossroadID', 'timeBegin', 'value']
pred = pred[order]
pred.to_csv(r'data\tmp\{}\pred_{}.csv'.format(term, pre_id), index=False,
columns=['date', 'crossroadID', 'timeBegin', 'value'])
def ap(f):
"""
聚类算法
:param f:FeatureEn
:return:训练集中各个卡口的类别(字典类型),
:center_id:中心点信息
"""
cos, index = f.similarity_matrix()
cluster_centers_indices, labels = ap_predict(cos)
id_type = dict(zip(index, labels))
center_id = dict(zip([i+1 for i in range(len(cluster_centers_indices))], cluster_centers_indices))
print(center_id, id_type)
return id_type, center_id
def regression_ex(term='final'):
keylst = [
100115, 100245, 100246, 100374, 100003, 100004, 100020, 100285, 100159, 100287, 100288, 100164, 100300, 100179,
100053, 100183, 100315, 100061, 100193, 100066, 100457, 100343, 100217, 100434, 100249, 100316, 100329, 100019,
100340, 100041, 100069
]
keylst = [val for val in keylst for i in range(3024)]
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
prp = PreProcessor(term) # 数据管理器
train_x, train_y, test_x = prp.load_traindata()
# 训练模型
lr = LinearRegression()
# print(train_x.iloc[:, 0:1])
lr.fit(train_x.iloc[:, 0:1].values, train_y)
test_y = lr.predict(test_x.values)
# print(test_y)
return test_y
if __name__ == '__main__':
term = 'final' # 初赛:first;复赛:final
# term = 'first' # 初赛:first;复赛:final
prp = PreProcessor(term) # 数据管理器
# train_x, train_y, test_x = prp.load_traindata()
# arma_ex(term) # 时序模型
fe = FeatureEn(prp)
# fe.get_train_data()
# keylst = fe.get_text_data()
# print(keylst)
# ap(fe)
regression_ex(term)