def get_linear_model_status(code, ptype='f', dtype='d', type='l', start=None, end=None): df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) if not dtype == 'd': df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True) asset = df['close'] log.info("df:%s" % asset[:1]) asset = asset.dropna() X = np.arange(len(asset)) x = sm.add_constant(X) model = regression.linear_model.OLS(asset, x).fit() a = model.params[0] b = model.params[1] log.info("X:%s a:%s b:%s" % (len(asset), a, b)) Y_hat = X * b + a if Y_hat[-1] > Y_hat[1]: log.debug("u:%s" % Y_hat[-1]) log.debug("price:" % asset.iat[-1]) if type.upper() == 'M': diff = asset.iat[-1] - Y_hat[-1] if diff > 0: return True, len(asset), diff else: return False, len(asset), diff elif type.upper() == 'L': i = (asset.values.T - Y_hat).argmin() c_low = X[i] * b + a - asset.values[i] Y_hatlow = X * b + a - c_low diff = asset.iat[-1] - Y_hatlow[-1] if asset.iat[-1] - Y_hatlow[-1] > 0: return True, len(asset), diff else: return False, len(asset), diff else: log.debug("d:%s" % Y_hat[1]) return False, 0, 0 return False, 0, 0
def longsklearn(code='999999', ptype='f', dtype='d', start=None, end=None): # code='999999' # dtype = 'w' # start = '2014-09-01' # start = None # end='2015-12-23' # end = None df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) # if not dtype == 'd': # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) # print df[:1] h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) # plt.plot(xt,yt,'-g',linewidth=5) # plt.plot(closep) bV = [] bP = [] uV = [] uP = [] for i in range(1, len(highp) - 1): # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) for i in range(1, len(highp) - 1): # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]: if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]: uV.append(highp[i]) uP.append(i) print(highp) print("uV:%s" % uV[:1]) print("uP:%s" % uP[:1]) print("bV:%s" % bV[:1]) print("bP:%s" % bP[:1]) sV, sP = LIS(uV) dV, dP = LIS(bV) print("sV:%s" % sV[:1]) print("sP:%s" % sP[:1]) print("dV:%s" % dV[:1]) print("dP:%s" % dP[:1]) sidx = [] didx = [] for i in range(len(sP)): # idx.append(bP[p[i]]) sidx.append(uP[sP[i]]) for i in range(len(dP)): # idx.append(bP[p[i]]) didx.append(bP[dP[i]]) print("sidx:%s" % sidx[:1]) print("didx:%s" % didx[:1]) # plt.plot(closep) # plt.plot(idx,d,'ko') lr = LinearRegression() X = np.atleast_2d(np.array(sidx)).T Y = np.array(sV) lr.fit(X, Y) estV = lr.predict(xt) fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax = fig.add_subplot(111) plt.grid(True) # print h.index[:5], h['close'] ax = h['close'].plot() # ax.plot(pd.datetime(h.index),h['close'], linewidth=1) # ax.plot(uP, uV, linewidth=1) # ax.plot(uP, uV, 'ko') # ax.plot(bP, bV, linewidth=1) # ax.plot(bP, bV, 'bo') # # ax.plot(sP, sV, linewidth=1) # # ax.plot(sP, sV, 'yo') # ax.plot(sidx, sV, linewidth=1) # ax.plot(sidx, sV, 'ro') # ax.plot(didx, dV, linewidth=1) # ax.plot(didx, dV, 'co') df['mean'] = list( map(lambda h, l: (h + l) / 2, df.high.values, df.low.values)) print(df['mean'][:1]) # d=df.mean dw = dw.set_index('date') # print dw[:2] # ax.plot(df.index,df['mean'],'g',linewidth=1) ax.plot(df.index, pd.rolling_mean(df['mean'], 60), 'g', linewidth=1) ax.plot(dw.index, pd.rolling_mean(dw.close, 5), 'r', linewidth=1) ax.plot(dw.index, pd.rolling_min(dw.close, 5), 'bo') ax.plot(dw.index, pd.rolling_max(dw.close, 5), 'yo') ax.plot(dw.index, pd.expanding_max(dw.close, 5), 'ro') ax.plot(dw.index, pd.expanding_min(dw.close, 5), 'go') # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:] # print pd.rolling_min(df.close,20) # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:] # print pd.rolling_max(df.close,20) # ax.plot(idx, d, 'ko') # ax.plot(xt, estV, '-r', linewidth=5) # ax.plot(xt, yt, '-g', linewidth=5) # ax2 = fig.add_subplot(122) # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt) # f=lambda x:x[-int(len(x)/10):] # ax2.plot(f(closep)) # ax2.plot(f(idx),f(d),'ko') # ax2.plot(f(xt),f(estV),'-r',linewidth=5) # ax2.plot(f(xt),f(yt),'-g',linewidth=5) # # plt.show() scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax, base_scale=scale) figPan = zp.pan_factory(ax) show()
def longsklearn(code='999999'): # code='999999' df = tdd.get_tdx_append_now_df(code, 'f').sort_index(ascending=True) # print df[:1] h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) # plt.plot(xt,yt,'-g',linewidth=5) # plt.plot(closep) bV = [] bP = [] for i in range(1, len(highp) - 1): if highp[i] <= highp[i - 1] and highp[i] < highp[ i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) d, p = LIS(bV) idx = [] for i in range(len(p)): idx.append(bP[p[i]]) # plt.plot(closep) # plt.plot(idx,d,'ko') lr = LinearRegression() X = np.atleast_2d(np.array(idx)).T Y = np.array(d) lr.fit(X, Y) estV = lr.predict(xt) fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax = fig.add_subplot(111) plt.grid(True) ax.plot(closep, linewidth=2) ax.plot(idx, d, 'ko') ax.plot(xt, estV, '-r', linewidth=5) ax.plot(xt, yt, '-g', linewidth=5) # ax2 = fig.add_subplot(122) # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt) # f=lambda x:x[-int(len(x)/10):] # ax2.plot(f(closep)) # ax2.plot(f(idx),f(d),'ko') # ax2.plot(f(xt),f(estV),'-r',linewidth=5) # ax2.plot(f(xt),f(yt),'-g',linewidth=5) # # plt.show() scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax, base_scale=scale) figPan = zp.pan_factory(ax) show()
def longsklearn(code='999999', ptype='f',dtype='d',start=None,end=None): # code='999999' # dtype = 'w' # start = '2014-09-01' # start = None # end='2015-12-23' # end = None df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) # if not dtype == 'd': # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) # print df[:1] h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) # plt.plot(xt,yt,'-g',linewidth=5) # plt.plot(closep) bV = [] bP = [] uV = [] uP = [] for i in range(1, len(highp) - 1): # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) for i in range(1, len(highp) - 1): # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]: if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]: uV.append(highp[i]) uP.append(i) print highp print "uV:%s" % uV[:1] print "uP:%s" % uP[:1] print "bV:%s" % bV[:1] print "bP:%s" % bP[:1] sV, sP = LIS(uV) dV, dP = LIS(bV) print "sV:%s" % sV[:1] print "sP:%s" % sP[:1] print "dV:%s" % dV[:1] print "dP:%s" % dP[:1] sidx = [] didx = [] for i in range(len(sP)): # idx.append(bP[p[i]]) sidx.append(uP[sP[i]]) for i in range(len(dP)): # idx.append(bP[p[i]]) didx.append(bP[dP[i]]) print "sidx:%s"%sidx[:1] print "didx:%s"%didx[:1] # plt.plot(closep) # plt.plot(idx,d,'ko') lr = LinearRegression() X = np.atleast_2d(np.array(sidx)).T Y = np.array(sV) lr.fit(X, Y) estV = lr.predict(xt) fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax = fig.add_subplot(111) plt.grid(True) # print h.index[:5], h['close'] ax = h['close'].plot() # ax.plot(pd.datetime(h.index),h['close'], linewidth=1) # ax.plot(uP, uV, linewidth=1) # ax.plot(uP, uV, 'ko') # ax.plot(bP, bV, linewidth=1) # ax.plot(bP, bV, 'bo') # # ax.plot(sP, sV, linewidth=1) # # ax.plot(sP, sV, 'yo') # ax.plot(sidx, sV, linewidth=1) # ax.plot(sidx, sV, 'ro') # ax.plot(didx, dV, linewidth=1) # ax.plot(didx, dV, 'co') df['mean']=map(lambda h,l:(h+l)/2,df.high.values,df.low.values) print df['mean'][:1] # d=df.mean dw=dw.set_index('date') # print dw[:2] # ax.plot(df.index,df['mean'],'g',linewidth=1) ax.plot(df.index,pd.rolling_mean(df['mean'], 60), 'g',linewidth=1) ax.plot(dw.index,pd.rolling_mean(dw.close, 5), 'r',linewidth=1) ax.plot(dw.index,pd.rolling_min(dw.close, 5), 'bo') ax.plot(dw.index,pd.rolling_max(dw.close, 5), 'yo') ax.plot(dw.index,pd.expanding_max(dw.close, 5), 'ro') ax.plot(dw.index,pd.expanding_min(dw.close, 5), 'go') # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:] # print pd.rolling_min(df.close,20) # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:] # print pd.rolling_max(df.close,20) # ax.plot(idx, d, 'ko') # ax.plot(xt, estV, '-r', linewidth=5) # ax.plot(xt, yt, '-g', linewidth=5) # ax2 = fig.add_subplot(122) # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt) # f=lambda x:x[-int(len(x)/10):] # ax2.plot(f(closep)) # ax2.plot(f(idx),f(d),'ko') # ax2.plot(f(xt),f(estV),'-r',linewidth=5) # ax2.plot(f(xt),f(yt),'-g',linewidth=5) # # plt.show() scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax, base_scale=scale) figPan = zp.pan_factory(ax) show()
def get_linear_model_histogram(code, ptype="f", dtype="d", start=None, end=None): # 399001','cyb':'zs399006','zxb':'zs399005 # code = '999999' # code = '601608' # code = '000002' # asset = ts.get_hist_data(code)['close'].sort_index(ascending=True) # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True) df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) if not dtype == "d": df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) asset = df["close"] log.info("df:%s" % asset[:1]) asset = asset.dropna() dates = asset.index if not code.startswith("999") or not code.startswith("399"): if code[:1] in ["5", "6", "9"]: code2 = "999999" elif code[:1] in ["3"]: code2 = "399006" else: code2 = "399001" df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True) if not dtype == "d": df1 = tdd.get_tdx_stock_period_to_type(df1, dtype).sort_index(ascending=True) asset1 = df1.loc[asset.index, "close"] startv = asset1[:1] # asset_v=asset[:1] # print startv,asset_v asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # print asset1[:4] # 画出价格随时间变化的图像 # _, ax = plt.subplots() # fig = plt.figure() fig = plt.figure(figsize=(16, 10)) # fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax1 = fig.add_subplot(321) # asset=asset.apply(lambda x:round( x/asset[:1],2)) ax1.plot(asset) # ax1.plot(asset1,'-r', linewidth=2) ticks = ax1.get_xticks() ax1.set_xticklabels([dates[i] for i in ticks[:-1]]) # Label x-axis with dates # 拟合 X = np.arange(len(asset)) x = sm.add_constant(X) model = regression.linear_model.OLS(asset, x).fit() a = model.params[0] b = model.params[1] # log.info("a:%s b:%s" % (a, b)) log.info("X:%s a:%s b:%s" % (len(asset), a, b)) Y_hat = X * b + a # 真实值-拟合值,差值最大最小作为价值波动区间 # 向下平移 i = (asset.values.T - Y_hat).argmin() c_low = X[i] * b + a - asset.values[i] Y_hatlow = X * b + a - c_low # 向上平移 i = (asset.values.T - Y_hat).argmax() c_high = X[i] * b + a - asset.values[i] Y_hathigh = X * b + a - c_high plt.plot(X, Y_hat, "k", alpha=0.9) plt.plot(X, Y_hatlow, "r", alpha=0.9) plt.plot(X, Y_hathigh, "r", alpha=0.9) plt.xlabel("Date", fontsize=14) plt.ylabel("Price", fontsize=14) plt.title(code, fontsize=14) plt.grid(True) # plt.legend([code]); # plt.legend([code, 'Value center line', 'Value interval line']); # fig=plt.fig() # fig.figsize = [14,8] scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax1, base_scale=scale) figPan = zp.pan_factory(ax1) ax2 = fig.add_subplot(323) ticks = ax2.get_xticks() ax2.set_xticklabels([dates[i] for i in ticks[:-1]]) # plt.plot(X, Y_hat, 'k', alpha=0.9) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y, "r", alpha=0.9) c = c + d # asset=asset.apply(lambda x:round(x/asset[:1],2)) ax2.plot(asset) # ax2.plot(asset1,'-r', linewidth=2) plt.xlabel("Date", fontsize=14) plt.ylabel("Price", fontsize=14) plt.grid(True) # plt.title(code, fontsize=14) # plt.legend([code]) # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。 ax3 = fig.add_subplot(322) # distance = (asset.values.T - Y_hat) distance = (asset.values.T - Y_hat)[0] if code.startswith("999") or code.startswith("399"): ax3.plot(asset) plt.plot(distance) ticks = ax3.get_xticks() ax3.set_xticklabels([dates[i] for i in ticks[:-1]]) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y - Y_hat, "r", alpha=0.9) c = c + d ax3.plot(asset) plt.xlabel("Date", fontsize=14) plt.ylabel("Price-center price", fontsize=14) plt.grid(True) else: as3 = asset.apply(lambda x: round(x / asset[:1], 2)) ax3.plot(as3) ax3.plot(asset1, "-r", linewidth=2) plt.grid(True) zp3 = zoompan.ZoomPan() figZoom = zp3.zoom_factory(ax3, base_scale=scale) figPan = zp3.pan_factory(ax3) # plt.title(code, fontsize=14) # plt.legend([code]) # 统计出每个区域内各股价的频数,得到直方图,为了更精细的显示各个区域的频数,这里将整个边界区间分成100份。 ax4 = fig.add_subplot(325) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) # now_distanse=int((asset.iat[-1]-Y_hat[-1])/Y_hat[-1]*100) # log.debug("dis:%s now:%s"%(distance[:2],now_distanse)) # log.debug("now_distanse:%s"%now_distanse) distance = asset.values.T - Y_hat now_distanse = asset.iat[-1] - Y_hat[-1] # distance = (asset.values.T-Y_hat)[0] pd.Series(distance).plot(kind="hist", stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color="red") # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel("Undervalue ------------------------------------------> Overvalue", fontsize=14) plt.ylabel("Frequency", fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]]) plt.grid(True) # plt.show() # import os # print(os.path.abspath(os.path.curdir)) ax5 = fig.add_subplot(326) # fig.figsize=(5, 10) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) distance = (asset.values.T - Y_hat) / Y_hat * 100 now_distanse = (asset.iat[-1] - Y_hat[-1]) / Y_hat[-1] * 100 log.debug("dis:%s now:%s" % (distance[:2], now_distanse)) log.debug("now_distanse:%s" % now_distanse) # n, bins = np.histogram(distance, 50) # print n, bins[:2] pd.Series(distance).plot(kind="hist", stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color="red") # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel("Undervalue ------------------------------------------> Overvalue", fontsize=14) plt.ylabel("Frequency", fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]]) plt.grid(True) ax6 = fig.add_subplot(324) h = df.loc[:, ["open", "close", "high", "low"]] highp = h["high"].values lowp = h["low"].values openp = h["open"].values closep = h["close"].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) bV = [] bP = [] for i in range(1, len(highp) - 1): if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) d, p = LIS(bV) idx = [] for i in range(len(p)): idx.append(bP[p[i]]) lr = LinearRegression() X = np.atleast_2d(np.array(idx)).T Y = np.array(d) lr.fit(X, Y) estV = lr.predict(xt) ax6.plot(closep, linewidth=2) ax6.plot(idx, d, "ko") ax6.plot(xt, estV, "-r", linewidth=3) ax6.plot(xt, yt, "-g", linewidth=3) plt.grid(True) # plt.tight_layout() zp2 = zoompan.ZoomPan() figZoom = zp2.zoom_factory(ax6, base_scale=scale) figPan = zp2.pan_factory(ax6) show()