コード例 #1
0
def predict(filename, n):
    """用state model里的arima模型做预测

    :param filename: 文件名
    :param        n: 预测值的个数
    :return: n个预测值
    """
    li_data = file_op.read_file(filename)
    prd = []
    for j, i in enumerate(li_data):
        if j == 20:
            break
        print('_________the %dth line_______________' % (j + 1))
        a_data = detect_op.turn_to_np_float_64(i[1:], 1)
        model_order = get_order(a_data)
        print('the order is ', model_order[0])
        if model_order is not False:
            data = pandas.Series(model_order[1])
            data.index = pandas.date_range('2001-01-01', periods=len(data))
            order_ar = model_order[0][0]
            order_ma = model_order[0][2]
            # d_times = model_order[0][1]
            last_elem = model_order[2]  # 用来还原预测数据
            try:
                model = sm.tsa.ARMA(data, (order_ar, order_ma)).fit(disp=-1,
                                                                    maxiter=20)
                predict_ordinary = model.forecast(n)[0]
                prd.append(re_diff_cal(last_elem, predict_ordinary))
            except ValueError:
                prd.append('Can not get good order.')
            # for j in last_elem:
            #     predict_ordinary += j
        elif isinstance(model_order, str):
            prd.append(model_order)
    return prd
コード例 #2
0
def brute_predict(file, n, ratio=1):
    """暴力搜索可行阶,然后预测"""
    li_data = file_op.read_file(file)
    max_lag = 10
    rlt = []
    for j, i in enumerate(li_data):
        if j == 20:
            break
        a_data = detect_op.turn_to_np_float_64(i[1:], 1)
        sta_data, dif_elem = get_stationary(a_data)
        if isinstance(sta_data, list):
            test_data, val_data = slice_li(sta_data, 10 - ratio, False)
            data = pandas.Series(test_data)
            data.index = pandas.date_range('2001-01-01', periods=len(data))
            tmp_pre = []
            tmp_ord = []
            for k in range(max_lag):
                for m in range(max_lag):
                    try:
                        model = sm.tsa.ARMA(data, (k, m)).fit(disp=-1,
                                                              maxiter=10)
                        pre = model.forecast(len(val_data))[0]
                        tmp_pre.append(pre)
                        tmp_ord.append((k, m))
                    except numpy.linalg.linalg.LinAlgError:
                        continue
                    except ValueError:
                        continue
                    else:
                        pre = model.forecast(len(val_data))[0]
                        tmp_pre.append(pre)
                        tmp_ord.append((k, m))
            tmp_mse = []
            o = 0
            if len(tmp_pre):  # 数组不空,则有阶数可用,然后找合适的阶
                for k in tmp_pre:
                    tmp_mse.append(
                        model_evaluation.mean_square_error(k, val_data))
                    o = get_min_li(tmp_mse)[1]  # 找到均方误差小的那一组
                try:
                    fin_data = pandas.Series(test_data)
                    fin_data.index = pandas.date_range('2001-01-01',
                                                       periods=len(fin_data))
                    fin_mod = sm.tsa.ARMA(fin_data, tmp_ord[o]).fit(disp=-1,
                                                                    maxiter=20)
                except ValueError:
                    rlt.append('Can not get good order.')
                except numpy.linalg.linalg.LinAlgError:
                    rlt.append('Can not get good order.')
                else:
                    fin_pre = fin_mod.forecast(n)[0]
                    fin = re_diff_cal(dif_elem, fin_pre)
                    rlt.append(fin)
            else:
                rlt.append('Can not get good order.')
        else:
            rlt.append(dif_elem)
    return rlt
コード例 #3
0
def get_result(pre, ver_fil):
    """获得一个列表,存入均方误差得分和没有预测的原因

    :param pre:     预测的序列
    :param ver_fil: 验证文件名
    """
    ver_li = file_op.read_file(ver_fil, is_num=1, del_first=1)

    li_re = []
    for ii, i in enumerate(pre):
        if isinstance(i, str) is False:
            # print(i, ver_li[ii])
            mse = model_evaluation.mean_square_error(i, ver_li[ii])
            li_re.append(mse)
        else:
            li_re.append(i)
    return li_re
コード例 #4
0
def write_file_to_db(path, db_name):
    each_name = file_op.each_file_or_dir_name(path)
    for i in each_name:
        each_file = file_op.each_file_or_dir_name(i)
        for j in each_file:
            arr3 = source.Source.turn_col_to_list(file_op.read_file(j),
                                                  j.split('/')[-1])
            print('Now write ' + j.split('/')[-1] + ' .')
            # k = 0
            for i2 in arr3:
                # if k % 50 == 0:
                #     print('k = ', k)
                db_name.write_points(i2)
                # k += 1
            print('Done.')
            del arr3
    return
コード例 #5
0
        10930, 10318, 10595, 10972, 7706, 6756, 9092, 10551, 9722, 10913,
        11151, 8186, 6422, 6337, 11649, 11652, 10310, 12043, 7937, 6476, 9662,
        9570, 9981, 9331, 9449, 6773, 6304, 9355, 10477, 10148, 10395, 11261,
        8713, 7299, 10424, 10795, 11069, 11602, 11427, 9095, 7707, 10767,
        12136, 12812, 12006, 12528, 10329, 7818, 11719, 11683, 12603, 11495,
        13670, 11337, 10232, 13261, 13230, 15535, 16837, 19598, 14823, 11622,
        19391, 18177, 19994, 14723, 15694, 13248, 9543, 12872, 13101, 15053,
        12619, 13749, 10228, 9725, 14729, 12518, 14564, 15085, 14722, 11999,
        9390, 13481, 14795, 15845, 15271, 14686, 11054, 10395
    ]
    # li3 = detect_op.turn_to_np_float_64(li2, 1)

    filename1 = 'F:/database_needed/UCR_TS_Archive_2015/50words/50words_TRAIN'
    # pre1 = predict(filename1, 2)
    # print(pre1)
    li_data1 = file_op.read_file(filename1)
    a_data0 = detect_op.turn_to_np_float_64(li_data1[0][1:], 1)

    # brute get order
    # test_data, val_data = slice_li(a_data0, 2)
    # data1 = pandas.Series(test_data)
    # data1.index = pandas.date_range('2001-01-01', periods=len(data1))
    # max_lag = 5
    # tmp_pre = []
    # tmp_ord = []
    # for k in range(max_lag):
    #     for m in range(max_lag):
    #         if k != 0 or m != 0:
    #             # print(k, m)
    #             try:
    #                 model = sm.tsa.ARMA(data1, (k, m)).fit(disp=-1, maxiter=5)