def predict(filename, n): """用state model里的arima模型做预测 :param filename: 文件名 :param n: 预测值的个数 :return: n个预测值 """ li_data = file_op.read_file(filename) prd = [] for j, i in enumerate(li_data): if j == 20: break print('_________the %dth line_______________' % (j + 1)) a_data = detect_op.turn_to_np_float_64(i[1:], 1) model_order = get_order(a_data) print('the order is ', model_order[0]) if model_order is not False: data = pandas.Series(model_order[1]) data.index = pandas.date_range('2001-01-01', periods=len(data)) order_ar = model_order[0][0] order_ma = model_order[0][2] # d_times = model_order[0][1] last_elem = model_order[2] # 用来还原预测数据 try: model = sm.tsa.ARMA(data, (order_ar, order_ma)).fit(disp=-1, maxiter=20) predict_ordinary = model.forecast(n)[0] prd.append(re_diff_cal(last_elem, predict_ordinary)) except ValueError: prd.append('Can not get good order.') # for j in last_elem: # predict_ordinary += j elif isinstance(model_order, str): prd.append(model_order) return prd
def brute_predict(file, n, ratio=1): """暴力搜索可行阶,然后预测""" li_data = file_op.read_file(file) max_lag = 10 rlt = [] for j, i in enumerate(li_data): if j == 20: break a_data = detect_op.turn_to_np_float_64(i[1:], 1) sta_data, dif_elem = get_stationary(a_data) if isinstance(sta_data, list): test_data, val_data = slice_li(sta_data, 10 - ratio, False) data = pandas.Series(test_data) data.index = pandas.date_range('2001-01-01', periods=len(data)) tmp_pre = [] tmp_ord = [] for k in range(max_lag): for m in range(max_lag): try: model = sm.tsa.ARMA(data, (k, m)).fit(disp=-1, maxiter=10) pre = model.forecast(len(val_data))[0] tmp_pre.append(pre) tmp_ord.append((k, m)) except numpy.linalg.linalg.LinAlgError: continue except ValueError: continue else: pre = model.forecast(len(val_data))[0] tmp_pre.append(pre) tmp_ord.append((k, m)) tmp_mse = [] o = 0 if len(tmp_pre): # 数组不空,则有阶数可用,然后找合适的阶 for k in tmp_pre: tmp_mse.append( model_evaluation.mean_square_error(k, val_data)) o = get_min_li(tmp_mse)[1] # 找到均方误差小的那一组 try: fin_data = pandas.Series(test_data) fin_data.index = pandas.date_range('2001-01-01', periods=len(fin_data)) fin_mod = sm.tsa.ARMA(fin_data, tmp_ord[o]).fit(disp=-1, maxiter=20) except ValueError: rlt.append('Can not get good order.') except numpy.linalg.linalg.LinAlgError: rlt.append('Can not get good order.') else: fin_pre = fin_mod.forecast(n)[0] fin = re_diff_cal(dif_elem, fin_pre) rlt.append(fin) else: rlt.append('Can not get good order.') else: rlt.append(dif_elem) return rlt
def get_result(pre, ver_fil): """获得一个列表,存入均方误差得分和没有预测的原因 :param pre: 预测的序列 :param ver_fil: 验证文件名 """ ver_li = file_op.read_file(ver_fil, is_num=1, del_first=1) li_re = [] for ii, i in enumerate(pre): if isinstance(i, str) is False: # print(i, ver_li[ii]) mse = model_evaluation.mean_square_error(i, ver_li[ii]) li_re.append(mse) else: li_re.append(i) return li_re
def write_file_to_db(path, db_name): each_name = file_op.each_file_or_dir_name(path) for i in each_name: each_file = file_op.each_file_or_dir_name(i) for j in each_file: arr3 = source.Source.turn_col_to_list(file_op.read_file(j), j.split('/')[-1]) print('Now write ' + j.split('/')[-1] + ' .') # k = 0 for i2 in arr3: # if k % 50 == 0: # print('k = ', k) db_name.write_points(i2) # k += 1 print('Done.') del arr3 return
10930, 10318, 10595, 10972, 7706, 6756, 9092, 10551, 9722, 10913, 11151, 8186, 6422, 6337, 11649, 11652, 10310, 12043, 7937, 6476, 9662, 9570, 9981, 9331, 9449, 6773, 6304, 9355, 10477, 10148, 10395, 11261, 8713, 7299, 10424, 10795, 11069, 11602, 11427, 9095, 7707, 10767, 12136, 12812, 12006, 12528, 10329, 7818, 11719, 11683, 12603, 11495, 13670, 11337, 10232, 13261, 13230, 15535, 16837, 19598, 14823, 11622, 19391, 18177, 19994, 14723, 15694, 13248, 9543, 12872, 13101, 15053, 12619, 13749, 10228, 9725, 14729, 12518, 14564, 15085, 14722, 11999, 9390, 13481, 14795, 15845, 15271, 14686, 11054, 10395 ] # li3 = detect_op.turn_to_np_float_64(li2, 1) filename1 = 'F:/database_needed/UCR_TS_Archive_2015/50words/50words_TRAIN' # pre1 = predict(filename1, 2) # print(pre1) li_data1 = file_op.read_file(filename1) a_data0 = detect_op.turn_to_np_float_64(li_data1[0][1:], 1) # brute get order # test_data, val_data = slice_li(a_data0, 2) # data1 = pandas.Series(test_data) # data1.index = pandas.date_range('2001-01-01', periods=len(data1)) # max_lag = 5 # tmp_pre = [] # tmp_ord = [] # for k in range(max_lag): # for m in range(max_lag): # if k != 0 or m != 0: # # print(k, m) # try: # model = sm.tsa.ARMA(data1, (k, m)).fit(disp=-1, maxiter=5)