コード例 #1
0
ファイル: Factor-Analysize-v3.py プロジェクト: rlcjj/Obsidia
 def summary_arma_estimate(self, maximum_p, date, count):
     factor_ret_vector, bias_vector = test_regression.get_factor_ret_vector(
         factor_real, date=date, count=count)
     result_df_arma = pd.DataFrame(index=range(1, maximum_p),
                                   columns=[
                                       'weights_predict', 'bias_predict',
                                       'loss', 'direction_precision'
                                   ],
                                   dtype='float')
     descendant = list(factor_real.columns)
     today_factor_exposure = factor_real.loc[date, :]
     today_index = MktData.index.get_loc(date)
     tommorow_index = today_index + 1
     tommorow_ret = MktData.loc[MktData.index[tommorow_index],
                                (descendant, 'ret')]
     tommorow_ret.index = tommorow_ret.index.droplevel(1)
     for p in range(1, maximum_p):
         arma_weights = ARMA(factor_ret_vector, order=(p, 0)).fit(disp=1)
         weights_predict_arma = np.asscalar(arma_weights.forecast(1)[0])
         result_df_arma.loc[p, 'weights_predict'] = weights_predict_arma
         arma_bias = ARMA(bias_vector, order=(p, 0)).fit(disp=-1)
         bias_predict_arma = np.asscalar(arma_bias.forecast(1)[0])
         result_df_arma.loc[p, 'bias_predict'] = bias_predict_arma
         predict_tommorow_ret = weights_predict_arma * today_factor_exposure + bias_predict_arma
         loss = np.sum((predict_tommorow_ret - tommorow_ret)**2)
         direction_precision = (predict_tommorow_ret * tommorow_ret >
                                0).sum() / len(tommorow_ret)
         result_df_arma.loc[p, 'loss'] = loss
         result_df_arma.loc[p, 'direction_precision'] = direction_precision
     return result_df_arma
コード例 #2
0
 def multi_regression_arma(self, factor_ts, Mkt_ts, date, param_df,
                           maximum_p, previous_load_count):
     group_stock = list(factor_ts.columns)
     today_factor_index = factor_ts.index.get_loc(date)
     today_ret_index = Mkt_ts.index.get_loc(date)
     tommorrow_ret_index = today_ret_index + 1
     today_factor_exposure = factor_ts.loc[date, :]
     tommorrow_ret = Mkt_ts.loc[Mkt_ts.index[tommorrow_ret_index],
                                (group_stock, 'ret')]
     tommorrow_ret.index = tommorrow_ret.index.droplevel(level=1)
     weights_ts = []
     intercept_ts = []
     for factor_index, mkt_index in zip(
             range(today_factor_index,
                   today_factor_index - previous_load_count - 1, -1),
             range(today_ret_index,
                   today_ret_index - previous_load_count - 1, -1)):
         assert Mkt_ts.index[mkt_index] == factor_ts.index[factor_index]
         assert Mkt_ts.index[mkt_index +
                             1] == factor_ts.index[factor_index + 1]
         temp_factor_exposure = factor_ts.loc[factor_ts.index[factor_index -
                                                              1],
                                              group_stock]
         temp_factor_exposure = temp_factor_exposure.fillna(0)
         temp_mkt_ret = Mkt_ts.loc[Mkt_ts.index[mkt_index],
                                   (group_stock, 'ret')]
         temp_mkt_ret = temp_mkt_ret.fillna(0)
         temp_mkt_ret.index = temp_mkt_ret.index.droplevel(level=1)
         try:
             clf = LinearRegression()
             clf.fit(
                 np.array(temp_factor_exposure).reshape(-1, 1),
                 temp_mkt_ret)
             weights = clf.coef_[0]
             intercept = clf.intercept_
             weights_ts.append(weights)
             intercept_ts.append(intercept)
         except ValueError:
             print(temp_factor_exposure.isna())
             print(temp_mkt_ret.isna())
     for p in range(1, maximum_p):
         arma_weights = ARMA(weights_ts, order=(p, 0)).fit(disp=-1)
         predict_factor_ret = np.asscalar(arma_weights.forecast(1)[0])
         arma_intercept = ARMA(intercept_ts, order=(p, 0)).fit(disp=-1)
         predict_factor_intercept = np.asscalar(
             arma_intercept.forecast(1)[0])
         param_df.loc[p, 'weights'] = predict_factor_ret
         param_df.loc[p, 'intercept'] = predict_factor_intercept
         predict_ret = predict_factor_ret * today_factor_exposure + predict_factor_intercept
         square_loss = np.sum((predict_ret - tommorrow_ret)**2)
         mul = tommorrow_ret * predict_ret
         direction_precision = (mul > 0).sum() / len(mul)
         param_df.loc[p, 'direction_precision'] = direction_precision
         param_df.loc[p, 'loss'] = square_loss
コード例 #3
0
def run_arma(original_ts, maxar=7, maxma=7):
    print(original_ts.columns[0], 'start arma')
    original_ts_log = np.log(original_ts)
    if test_stationarity(original_ts_log.ix[:, 0]) < 0.01:
        diffn = 0
        diff_original_ts_log = original_ts_log
        print('平稳,不需要差分')
    else:
        diffn = best_diff(original_ts_log, maxdiff=8)
        diff_original_ts_log = produce_diffed_timeseries(original_ts_log, diffn)
        print('差分滞后阶数为'+str(diffn)+',已完成差分')

    order = choose_order(diff_original_ts_log, maxar, maxma)
    # order = (2, 3)
    print('模型的阶数为: ' + str(order))
    model = ARMA(diff_original_ts_log.ix[:, 0], order).fit(disp='-1', method='css')

    f = model.forecast(steps=3, alpha=0.05)[0]

    p = model.predict()
    predict = predict_recover(p, original_ts_log, diffn, 'predict')
    forecast = predict_recover(f, original_ts_log, diffn, 'forecast')

    #查看niheqingkuang
    # p = model.predict()
    # p = predict_recover(p, original_ts_log, diffn, 'predict')
    # plt.plot(p)
    # plt.plot(original_ts)
    # plt.show()
    return predict, forecast
コード例 #4
0
def installment_year_add_trend_feature(features, gr, feature_name, prefix):
    gr[feature_name].fillna(0, inplace=True)
    y = gr[feature_name].values
    try:
        x = np.arange(0, len(y)).reshape(-1, 1)
        lr = LinearRegression()
        lr.fit(x, y)
        trend = lr.coef_[0]
        predict = lr.predict(x.shape[0])[0]
    except:
        trend = np.nan
        predict = np.nan
    features['{}{}'.format(prefix, feature_name)] = trend
    features['{}{}'.format(prefix, feature_name) + "_predict"] = predict
    try:
        y = gr.loc[gr[feature_name] != 0, feature_name].values
        x = np.arange(0, len(y)).reshape(-1, 1)
        lr.fit(x, y)
        tr = lr.coef_[0]
        pr = lr.predict(y.shape[0])[0]
    except:
        tr = np.nan
        pr = np.nan
    features['{}{}'.format(prefix, feature_name) + "_non"] = tr
    features['{}{}'.format(prefix, feature_name) + "_non_predict"] = pr
    try:
        y = gr.loc[gr[feature_name] != 0, feature_name].values
        arma = ARMA(y, order=(0, 1)).fit()
        ma_tr = arma.maparams[0]
        ma_pr = arma.forecast(steps=1)[0][0]
    except:
        ma_tr = np.nan
        ma_pr = np.nan
    features['{}{}'.format(prefix, feature_name) + "_ma"] = ma_tr
    features['{}{}'.format(prefix, feature_name) + "_ma_predict"] = ma_pr
    try:
        y = gr.loc[gr[feature_name] != 0, feature_name].values
        arma = ARMA(y, order=(1, 0)).fit()
        ar_tr = arma.arparams[0]
        ar_pr = arma.forecast(steps=1)[0][0]
    except:
        ar_tr = np.nan
        ar_pr = np.nan
    features['{}{}'.format(prefix, feature_name) + "_ar"] = ar_tr
    features['{}{}'.format(prefix, feature_name) + "_ar_predict"] = ar_pr

    return features
コード例 #5
0
def func2(data,order):
    global model
    # 第一行的数据,差分后恢复需要这个
    first = data.iloc[0]
    data = data.diff(1).iloc[1:]
    model = ARMA(data, order=order).fit(disp=0)
    predict_sunspots = model.forecast(steps=35)[0]
    result = np.append(np.array([first]), predict_sunspots).cumsum()
    return result[1:]
コード例 #6
0
def build_model(time_series, n_steps, p, q, start):
    # arma = sm.tsa.ARMA(time_series,(p,q)).fit()
    # show_prediction(arma, time_series)
    p, d, q = (1, 3, 1)
    arma = ARMA(time_series, (0, 1)).fit(disp=-1, maxiter=100)

    f, err95, ci95 = arma.forecast(steps=n_steps)  # 95% CI
    _, err99, ci99 = arma.forecast(steps=n_steps, alpha=0.01)  # 99% CI

    idx = pd.date_range(start, periods=n_steps, freq='D')
    fc_95 = pd.DataFrame(np.column_stack([f, ci95]),
                         index=idx,
                         columns=['forecast', 'lower_ci_95', 'upper_ci_95'])
    fc_99 = pd.DataFrame(np.column_stack([ci99]),
                         index=idx,
                         columns=['lower_ci_99', 'upper_ci_99'])
    fc_all = fc_95.combine_first(fc_99)
    # show_forecast(fc_all)
    return fc_all
コード例 #7
0
ファイル: Factor-Analysize-v3.py プロジェクト: rlcjj/Obsidia
 def get_factor_ret_predict_arma(self, factor_ts, date, p, count):
     descendant = list(factor_ts.columns)
     today_factor_exposure = factor_ts.loc[date, :]
     today_index = MktData.index.get_loc(date)
     tommorow_index = today_index + 1
     tommorow_ret = MktData.loc[MktData.index[tommorow_index],
                                (descendant, 'ret')]
     tommorow_ret.index = tommorow_ret.index.droplevel(1)
     factor_ret_vector, bias_vector = self.get_factor_ret_vector(
         factor_ts, date, count=count)
     arma_weights = ARMA(factor_ret_vector, order=(p, 0)).fit(disp=1)
     weights_predict_arma = np.asscalar(arma_weights.forecast(1)[0])
     arma_bias = ARMA(bias_vector, order=(p, 0)).fit(disp=1)
     bias_predict_arma = np.asscalar(arma_bias.forecast(1)[0])
     predict_tommorow_ret = weights_predict_arma * today_factor_exposure + bias_predict_arma
     loss = np.sum((predict_tommorow_ret - tommorow_ret)**2)
     direction_precision = (predict_tommorow_ret * tommorow_ret >
                            0).sum() / len(tommorow_ret)
     print(loss, direction_precision)
     return predict_tommorow_ret
コード例 #8
0
 def arma_forecast(self, ts,  p, q,):
     arma = ARMA(ts, order=(p, q)).fit(disp=-1)
     ts_predict = arma.predict()
     next_ret = arma.forecast(1)[0]
     #print("Forecast stock extra return of next day: ", next_ret)
     # plt.clf()
     # plt.plot(ts_predict, label="Predicted")
     # plt.plot(ts, label="Original")
     # plt.legend(loc="best")
     # plt.title("AR Test {},{}".format(p, q))
     # #plt.show()
     return next_ret, arma.summary2()
コード例 #9
0
    def _get_volume_stats(self, daily_volume: pd.DataFrame):
        daily_volume["lv"] = np.log(daily_volume.volume)
        if len(daily_volume) > ROLLING_WINDOW * 3:
            daily_volume["mu_lv"] = daily_volume.lv.rolling(
                ROLLING_WINDOW, min_periods=1).mean().transform(np.ceil)
            daily_volume["excess_lv"] = daily_volume.lv - daily_volume.mu_lv
            model = ARMA(daily_volume.reset_index().excess_lv.dropna(),
                         (2, 1)).fit(disp=False)
            predicted_excess = model.forecast(1)[0]
            predicted_daily_lv = daily_volume.mu_lv.array[-1] + predicted_excess
        else:
            # fallback to volume geometric mean if there are too few observations
            predicted_daily_lv = daily_volume.volume[-ROLLING_WINDOW:].mean()

        return predicted_daily_lv, daily_volume.lv[-ROLLING_WINDOW:].var()
コード例 #10
0
class ts_AR:
    """
	Parameters
	------------
	ts_train:训练数据
	error_fun:使用的误差函数
	Attributes
	------------
	fittedModel:拟合的模型
	"""
    def __init__(self, ts_train, error_fun=None):
        self.ts_train = ts_train  # 训练时间序列
        self.error_fun = error_fun
        self.fittedModel = None

    def __adf_test(self):
        adftest = adfuller(self.ts_train, autolag='AIC')
        # 'Test Statistic','p-value','Lags Used','Number of Observations Used',XX
        # (-0.0, 0.958532086060056, 9, 10, {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}, -582.412544847778)
        adf_res = pd.Series(adftest[0:4],
                            index=[
                                'Test Statistic', 'p-value', 'Lags Used',
                                'Number of Observations Used'
                            ])
        for key, value in adftest[4].items():
            adf_res['Critical Value (%s)' % key] = value
        return adf_res

    def fit(self):
        adf_res = self.__adf_test()
        w = int(adf_res['Lags Used'])
        self.fittedModel = ARMA(self.ts_train, order=(w, 0)).fit(disp=-1)

    def get_fittedvalues(self):  #预测拟合值
        fittedvalues = self.fittedModel.predict(
        )  # 等价于self.fittedModel.fittedvalues Series类型
        fittedvalues = np.round(fittedvalues)  # 将预测的销量四舍五入
        return fittedvalues  #array类型

    def predict(self, forcast_period=2):
        y_forcast = self.fittedModel.forecast(forcast_period)[0]
        y_forcast = np.round(y_forcast)  # 将预测的销量四舍五入
        return y_forcast
コード例 #11
0
def predict_by_all(timestamps, data, test_data, num_sample, split,
                   output_steps):
    warnings.filterwarnings("ignore")
    index_all = np.zeros([test_data.shape[1] - output_steps, num_sample])
    valid_num = np.zeros(test_data.shape[1] - output_steps)
    error_all = []
    real = np.zeros([test_data.shape[1] - output_steps, num_sample])
    predict = np.zeros([test_data.shape[1] - output_steps, num_sample])
    #station_sample = np.random.randint(data.shape[0], size=num_sample)
    station_sample = np.arange(data.shape[0])
    #widgets = ['Train: ', Percentage(), ' ', Bar('-'), ' ', ETA()]
    #pbar = ProgressBar(widgets=widgets, maxval=test_data.shape[0]-output_steps).start()
    for t in xrange(test_data.shape[1] - output_steps):
        #pbar.update(t)
        if t % 10 == 0:
            print(t)
        error_index = []
        for r in xrange(num_sample):
            # t: which time slot
            # i: which station
            i = station_sample[r]
            train_df = pd.DataFrame(data[i][t:split[0] + t])
            train_df.index = pd.DatetimeIndex(timestamps[t:split[0] + t])
            try:
                results = ARMA(train_df, order=(2, 2)).fit(trend='c', disp=-1)
            except:
                error_index.append(r)
                continue
            pre, _, _ = results.forecast(output_steps)
            test_real = test_data[i][t:t + output_steps]
            real[t, r] = test_real
            predict[t, r] = pre
        index_all[t] = station_sample
        error_all.append(error_index)
        valid_num[t] = num_sample - len(error_index)
    #pbar.finish()
    return real, predict, index_all, error_all, valid_num
コード例 #12
0
def test_arma(timeseries):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        #order = st.arma_order_select_ic(timeseries, max_ar=5,max_ma=5,ic=['aic', 'bic', 'hqic'])
        model = ARMA(timeseries, (7, 2)).fit()
    return model.forecast(1)[0][0]
コード例 #13
0
def get_prediction(train_data, p, q):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        model = ARMA(train_data, (p, q)).fit(disp=0)
    return model.forecast(1)[0][0]
コード例 #14
0
ファイル: 3_1_1buildModel.py プロジェクト: kamakura233/hhh
        print('在AIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q))
        matrix.iloc[p, q] = np.nan
        arimafail = arma
        continue
    else:
        print(p, q)
        print(u'模型ARMA(%s,%s)符合白噪声检验' % (p, q))
        break

# In[6]:

arma.summary()  # 当p,q值为0,0时,summary方法报错

# In[7]:

forecast_values, forecasts_standard_error, forecast_confidence_interval = arma.forecast(
    5)
forecast_values

# In[8]:

predictdata = pd.DataFrame(xtest_value)
predictdata.insert(1, 'CWXT_DB:184:C:\\_predict', forecast_values)
predictdata.rename(columns={
    'CWXT_DB:184:C:\\': u'实际值',
    'CWXT_DB:184:C:\_predict': u'预测值'
},
                   inplace=True)
predictdata.info()

# In[10]:
コード例 #15
0
 def arma_forecast(self, ts, p, q):
     arma = ARMA(ts, order=(p, q)).fit(disp=-1)
     # ts_predict = arma.predict()
     next_ret = arma.forecast(1)[0]
     return next_ret, arma.summary2()
コード例 #16
0
model = ARMA(ts_diff_2, order=(1, 1))
result_arma = model.fit(disp=-1, method='css')

predict_ts = result_arma.predict()
# 一阶差分还原
diff_shift_ts = ts_diff_1.shift(1)
diff_recover_1 = predict_ts.add(diff_shift_ts)
# 再次一阶差分还原
rol_shift_ts = rol_mean.shift(1)
diff_recover = diff_recover_1.add(rol_shift_ts)
# 移动平均还原
rol_sum = ts_log.rolling(window=11).sum()
rol_recover = diff_recover * 12 - rol_sum.shift(1)
# 对数还原
log_recover = np.exp(rol_recover)
log_recover.dropna(inplace=True)

print log_recover

ts = ts[log_recover.index]  # 过滤没有预测的记录
plt.figure(facecolor='white')
log_recover.plot(color='blue', label='Predict')
ts.plot(color='red', label='Original')
plt.legend(loc='best')
plt.title('RMSE: %.4f' % np.sqrt(sum((log_recover - ts)**2) / ts.size))
plt.show()

print test_stationarity.proper_model(ts_log)

print model.forecast()
コード例 #17
0
# 将训练好的模型保存到train_model.m中
joblib.dump(model, joblib_path)

model_name="statsmodels_ES_model.pkl"
pickle_path=os.path.join(model_root_path,model_name)
# 将训练好的模型保存到train_model.pkl中
pickle.dump(model,open(pickle_path,"wb"))
'''
############################################################################
# 测试ARIMA
index = pd.date_range('5/1/2018', periods=20, freq='d')
ts = pd.Series([1.0, 2, 3, 4, 3, 6, 3, 7, 3, 5, 1, 2, 3, 4, 3, 6, 3, 7, 3, 5],
               index=index)
model = ARMA(ts, order=(2, 1))
model = model.fit(disp=-1, method='css')
y_fit = model.predict()
y_hat = model.forecast(4)[0]
print(y_fit)
print(y_hat)

model_root_path = "C://Users//Kang//Desktop//model_management//saved_models"

model_name = "statsmodels_ARMA_model.m"
joblib_path = os.path.join(model_root_path, model_name)
# 将训练好的模型保存到train_model.m中
joblib.dump(model, joblib_path)

model_name = "statsmodels_ARMA_model.pkl"
pickle_path = os.path.join(model_root_path, model_name)
# 将训练好的模型保存到train_model.pkl中
pickle.dump(model, open(pickle_path, "wb"))
コード例 #18
0
ファイル: arma.py プロジェクト: Karagul/CQF-2
dfa1 = dfa[:500].copy()

#################

all_num = 0
predict_list = []
vol_list = []
except_num = 0
for i in range(len(dfa1) - 5):
    try:
        pq = st.arma_order_select_ic(list(dfa1['r'][i:i + 5]),
                                     max_ar=3,
                                     max_ma=3,
                                     ic=['bic']).bic_min_order
        arma = ARMA(list(dfa1['r'][i:i + 5]), pq).fit(disp=False)
        output1 = arma.forecast()
        garch = arch_model(dfa1['r'][i:i + 5],
                           vol='Garch',
                           p=1,
                           o=0,
                           q=1,
                           dist='Normal').fit()
        output2 = garch.forecast()

        vol = np.sqrt(output2.variance.tail(1).values)
        vol_list = np.append(vol_list, vol)

        if vol >= dfa1['std'][i + 4]:
            output = output1[0] - 2 * vol
        else:
            output = output1[0] + 8 * vol
コード例 #19
0
D_data.plot()  # 时序图
plt.show()
plot_acf(D_data).show()  # 自相关图
plt.show()
plot_pacf(D_data).show()  # 偏自相关图
plt.show()
print(u'1阶差分序列的ADF检验结果为:', ADF(D_data[u'dst差分']))
print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))

data[u'dst'] = data[u'dst'].astype(float)
pmax = int(len(data) / 10)
qmax = int(len(data) / 10)
bic_matrix = []
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:
            tmp.append(ARMA(data, (p, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)
bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值
#  print(bic_matrix)
p, q = bic_matrix.stack().idxmin()
print(u'bic最小的P值和q值为:%s、%s' % (p, q))

model = ARMA(data, (p, q)).fit()
model.summary2()  # 给出一份模型报告
forecast = model.forecast(5)  # 作为期5天的预测,返回预测结果、标准误差、置信区间
print(forecast)
コード例 #20
0
error_index = np.zeros(run_times)
test_target = np.zeros([run_times, output_steps])
test_prediction = np.zeros([run_times, output_steps])
for r in range(run_times):
    print('run ' + str(r))
    i = np.random.randint(data.shape[0])
    j = np.random.randint(test_data.shape[-1] - output_steps)
    train_df = pd.DataFrame(data[i][j:split[0] + split[1] + j])
    train_df.index = pd.DatetimeIndex(timestamps[j:split[0] + split[1] + j])
    try:
        results = ARMA(train_df, order=(2, 2)).fit(trend='nc', disp=-1)
    except:
        error_index[error_count] = r
        error_count += 1
        continue
    pre, _, _ = results.forecast(output_steps)
    test_real = test_data[i][j:j + output_steps]
    index_all[r] = [i, j]
    test_target[r] = test_real
    test_prediction[r] = pre
    loss += np.sum(np.square(pre - test_real))
print('================ calculate rmse for test data ============')
#n_rmse_val = np.sqrt(np.sum(np.square(val_predict - val_real))*1.0/np.prod(val_real.shape))
#n_rmse_test = np.sqrt(np.sum(np.square(test_predict - test_real))*1.0/np.prod(test_real.shape))
#rmse_val = pre_process.real_loss(n_rmse_val)
#rmse_test = pre_process.real_loss(n_rmse_test)
#print('val loss is ' + str(n_rmse_val) + ' , ' + str(rmse_val))
#print('test loss is ' + str(n_rmse_test) + ' , ' + str(rmse_test))
#print('val loss is ' + str(n_rmse_val))
print('run times: ' + str(run_times))
print('error count: ' + str(error_count))
コード例 #21
0
def ts_arma(ts, p, q, start,end):
    arma = ARMA(ts, order=(p, q)).fit(disp = -1)
    print("未来五年:", arma.forecast(5)[0])
    ts_predict_arma = arma.predict(start,end)
    print(arma.summary())
    return ts_predict_arma
コード例 #22
0
# find the lag with the smallest aic
lag = min(aics, key=aics.get)
print('Optimal lag = {}'.format(lag))

# train-test procedure using moving window
series_len = len(series_monthly)
train_len = len(train)
test_len = len(test)
y_pred = pd.Series([])
coefficients = []
confidence_intervals = [[], []]
for i in range(test_len):
    print('Train - test iteration: i = {} from {}'.format(i + 1, test_len))
    dynamic_train = series_monthly.iloc[i:i + train_len]
    model = ARMA(dynamic_train, order=(lag, 0)).fit(**PARAMS)
    results = model.forecast(1)
    confidence_intervals[0].extend([results[2][0][0]])
    confidence_intervals[1].extend([results[2][0][1]])
    y_pred = y_pred.append(pd.Series(results[0], index=[test.index[i]]),
                           verify_integrity=True)
    coefficients.append(model.params)

# plot test-predicted data
f = plt.figure()
plt.plot(test, color='blue')
plt.plot(y_pred, color='orange')
plt.fill_between(test.index,
                 confidence_intervals[0],
                 confidence_intervals[1],
                 color='lightgrey')
plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1])
コード例 #23
0
#模型拟合

from statsmodels.tsa.arima_model import ARMA

Ct_ARMA = ARMA(Ct["Column2"], order=(4,2)).fit()
print(Ct_ARMA.summary())


# In[116]:


plt.plot(TSdata['Column1'], TSdata["Column2"], "o-")
plt.plot(Ct["Column1"][0:53], Ct_ARMA.fittedvalues, 'o-')

foresee = Ct_ARMA.forecast(6)[0].tolist()
plt.plot(TSdata["Column1"][53:66],foresee,'*-')
plt.show()


# In[118]:


print(TSdata[53:66])
print(foresee)


# In[ ]:


コード例 #24
0
ファイル: arima_stocks.py プロジェクト: gmahjub/stealthebasis
ar1.summary()

# the above creates the model ARMA, p =3, q=1

# next we want to plot the fitted values from the ARMA
# model against the actual values in udiff
# the udiff values are the log return values.
plt.figure(figsize=(12, 8))
plt.plot(udiff.values, color='blue')
preds = ar1.fittedvalues
plt.plot(preds, color='red')
plt.show()

# next, let's make a 2 step ahead forecast, and plot it
steps = 2
forecast = ar1.forecast(steps=steps)[0]
plt.figure(figsize=(12, 8))
plt.plot(udiff.values, color='blue')
preds = ar1.fittedvalues
plt.plot(preds, color='red')
plt.plot(pd.DataFrame(np.array([preds[-1], forecast[0]]).T,
                      index=range(
                          len(udiff.values) + 1,
                          len(udiff.values) + 3)),
         color='green')
plt.plot(pd.DataFrame(forecast,
                      index=range(
                          len(udiff.values) + 1,
                          len(udiff.values) + 1 + steps)),
         color='green')
plt.title('Display the predictions with the ARMA model')
コード例 #25
0
ファイル: everyday.py プロジェクト: WeijiaZhang/kdd2017
        diffed_ts = diff_ts(dta_log, d=[12, 1])
        # diffed_ts = diff_ts(dta,d=[12,1])

        # test_stationarity.testStationarity(diffed_ts)
        # test_stationarity.draw_acf_pacf(diffed_ts,l=30)
        model = arima_model(diffed_ts)

        model.get_proper_model()
        print 'bic:', model.bic, 'p:', model.p, 'q:', model.q
        print model.properModel.forecast()[0]
        # print model.forecast_next_day_value(type='day')

        model2 = ARMA(diffed_ts, (model.p, 1, model.q)).fit()
        model2.summary2()
        # predict_sunspots = model2.predict('2061','2071',dynamic=True)
        a = model2.forecast(10)[0][9]
        a_ts = predict_diff_recover(a, d=[12, 1])

        # log_a = a_ts
        log_a = np.exp(a_ts)

        print log_a
        # pdb.set_trace()

        subdata.list[j].append(log_a)

        log_a = None
    np.savez('/home/wuxing/KDD/predict_only13.npz', subdata.list, j)
    print('--------------------------------' + str(j))

pdb.set_trace()
コード例 #26
0
def func1(data,order):
    global model
    model = ARMA(data, order=order).fit(disp=0)
    predict_sunspots = model.forecast(steps=35)[0]
    return predict_sunspots
コード例 #27
0

diffed_ts = diff_ts(dta_log,d=[1,1])
test_stationarity.testStationarity(diffed_ts)
test_stationarity.draw_acf_pacf(diffed_ts,l=31) 
model = arima_model(diffed_ts)
pdb.set_trace()
model.get_proper_model()
print 'bic:',model.bic,'p:',model.p,'q:',model.q
print model.properModel.forecast()[0]
# print model.forecast_next_day_value(type='day')

model2=ARMA(diffed_ts,(model.p,1,model.q)).fit()
model2.summary2()
predict_sunspots = model2.predict('2090','2100',dynamic=True)
a = model2.forecast(5)[0]
a_ts = predict_diff_recover(a,d=[1,1])
log_a = np.exp(a_ts)


print log_a
pdb.set_trace()



model.certain_model(6,0)

predict_ts = model.properModel.predict()
diff_recover_ts = predict_diff_recover(predict_ts,d=[1,1])
log_recover = np.exp(diff_recover_ts)
コード例 #28
0
ファイル: test_hyper_ts.py プロジェクト: longgb246/MLlearn
from statsmodels.tsa.arima_model import ARMA

# dta = sm.datasets.sunspots.load_pandas().data
# dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
# del dta["YEAR"]

a_pd = pd.DataFrame(range(10), columns=['value'])
arma_mod = sm.tsa.ARMA(a_pd, order=(0, 2)).fit(disp=False, trend='nc')
print(arma_mod.params)

a = range(10)
order = (0, 1)
arma_mod2 = ARMA(a_pd, order=order).fit(disp=False, trend='nc')
print(arma_mod2.params)
# predict_sunspots = arma_mod2.predict(0, 12)
arma_mod2.forecast(1)

# predict_sunspots = arma_mod.predict('1990', '2012', dynamic=True)
# print(predict_sunspots)


def proper_model(data_ts, maxLag):
    init_bic = float("inf")
    init_p = 0
    init_q = 0
    init_properModel = None
    for p in np.arange(maxLag):
        for q in np.arange(maxLag):
            model = ARMA(data_ts, order=(p, q))
            try:
                results_ARMA = model.fit(disp=-1, method='css')
コード例 #29
0
from db_tools import *
コード例 #30
0
ADF(Ct)

plot_acf(Ct,lags=50);
plot_pacf(Ct,lags=50);

import statsmodels.tsa.stattools as ts
ts.arma_order_select_ic(Ct,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])

from statsmodels.tsa.arima_model import ARMA
Ct_ARMA=ARMA(Ct,order=(3,0)).fit()
Ct_ARMA.summary()

plt.plot(Ct,'o-',Ct_ARMA.fittedvalues);

Ct_05=pd.DataFrame({' 实际值':TSdata['2018-05'].Close}); #2018-05 收盘价数据
Ct_05[' 预测值']=Ct_ARMA.forecast(22)[0] # 模型预测数据
Ct_05[' 绝对误差']=Ct_05[' 实际值']-Ct_05[' 预测值'];
Ct_05[' 相对误差(%)']=Ct_05[' 绝对误差']/Ct_05[' 实际值']*100;
Ct_05

# 第6章 大数据分析简介 
## 6.1 大数据的概念

## 6.2 Python文本预处理
### 字符串的基本操作
#### 字符串的统计
len('abc')
S=["asfef", "qwerty", "yuiop", "b", "stuff.blah.yech"];
len(S)
[len(s) for s in S]