def plot_pre(fn): t = read_t(fn) y = read_y(fn) yres = read_yres(fn) plt.plot_date(t+_adj_dates, y, 'x', color='lightblue') plt.plot_date(t+_adj_dates, yres, 'x', color='lightgreen') linsec = read_linsec(fn) ch = cut_ts(t, linsec) plt.plot_date(t[ch]+_adj_dates, y[ch], 'x', color='blue', label='original') plt.plot_date(t[ch]+_adj_dates, yres[ch], 'x', color='green', label='residual') outliers = read_outlier(fn) idx = outlier_index(t, outliers) plt.plot_date(t[idx]+_adj_dates, y[idx], 'o', mec='red', mew=1, mfc='blue') plt.plot_date(t[idx]+_adj_dates, yres[idx], 'o', mec='red', mew=1, mfc='green') for jump in read_jumps(fn): plt.axvline(jump + _adj_dates, color='red', ls='--') plt.grid('on') site = basename(fn).split('.')[0] cmpt = basename(fn).split('.')[1] plt.title('%s - %s'%(site, cmpt))
def pure_data_plot(self,connect=False,suffix='',cmap=cm.jet,bg=cm.bone(0.3)): #fig=plt.figure() ax=plt.axes() plt.axhline(y=0,color='grey', zorder=-1) plt.axvline(x=0,color='grey', zorder=-2) if cmap is None: if connect: ax.plot(self.x,self.y, 'b-',lw=2,alpha=0.5) ax.scatter(self.x,self.y, marker='o', c='b', s=40) else: if connect: if cmap in [cm.jet,cm.brg]: ax.plot(self.x,self.y, 'c-',lw=2,alpha=0.5,zorder=-1) else: ax.plot(self.x,self.y, 'b-',lw=2,alpha=0.5) c=[cmap((f-self.f[0])/(self.f[-1]-self.f[0])) for f in self.f] #c=self.f ax.scatter(self.x, self.y, marker='o', c=c, edgecolors=c, zorder=True, s=40) #, cmap=cmap) #plt.axis('equal') ax.set_xlim(xmin=-0.2*amax(self.x), xmax=1.2*amax(self.x)) ax.set_aspect('equal') #, 'datalim') if cmap in [cm.jet,cm.brg]: ax.set_axis_bgcolor(bg) if self.ZorY == 'Z': plt.xlabel(r'resistance $R$ in Ohm'); plt.ylabel(r'reactance $X$ in Ohm') if self.ZorY == 'Y': plt.xlabel(r'conductance $G$ in Siemens'); plt.ylabel(r'susceptance $B$ in Siemens') if self.show: plt.show() else: plt.savefig(join(self.sdc.plotpath,'c{}_{}_circle_data'.format(self.sdc.case,self.ZorY)+self.sdc.suffix+self.sdc.outsuffix+suffix+'.png'), dpi=240) plt.close()
def calc_Drawdown(self, strat_log_ret_column, num_shares): ''' Calculates maximum drawdown and drawdown period Parameters =========== strat_log_ret_column: Name of column that contains the log returns of the algo/strategy num_shares: Number of shares that should be purchased (can be determined via Kelly Criterion --- Might remove and make it auto calculate) Returns ======= t_per_seconds: Longest drawdown period in seconds t_per_hours: Longest drawdown period in hours max_drawdown: Maximum drawdown value ''' risk = self.res_df # Relevant log returns time series... # ...scaled by initial equity... risk['equity'] = self.res_df[strat_log_ret_column].cumsum().apply( np.exp) * num_shares risk['cummax'] = risk['equity'].cummax( ) # cumulative maximum values over time risk['drawdown'] = risk['cummax'] - risk[ 'equity'] # Drawdown values over time self.max_drawdown = risk['drawdown'].max() # Maximum drawdown value t_max = risk['drawdown'].idxmax() # Point in time when it happens temp = risk['drawdown'][risk[ 'drawdown'] == 0] # Identifies highs for which drawdown must be 0 # Calculates timedelta values between all highs periods = (temp.index[1:].to_pydatetime() - temp.index[:-1].to_pydatetime()) # Longest drawdown period in seconds self.t_per_seconds = periods.max() # Longest drawdown period in hours self.t_per_hours = self.t_per_seconds.seconds / 60 / 60 risk[['equity', 'cummax']].plot(figsize=(10, 6)) plt.axvline(t_max, c='r', alpha=0.5) plt.title( 'Max drawdown (vertical line) and drawdown periods (horizontal lines)' )
def plot_overview(self,suffix=''): x=self.x; y=self.y; r=self.radius; cx,cy=self.center.real,self.center.imag ax=plt.axes() plt.scatter(x,y, marker='o', c='b', s=40) plt.axhline(y=0,color='grey', zorder=-1) plt.axvline(x=0,color='grey', zorder=-2) t=linspace(0,2*pi,201) circx=r*cos(t) + cx circy=r*sin(t) + cy plt.plot(circx,circy,'g-') plt.plot([cx],[cy],'gx',ms=12) if self.ZorY == 'Z': philist,flist=[self.phi_a,self.phi_p,self.phi_n],[self.fa,self.fp,self.fn] elif self.ZorY == 'Y': philist,flist=[self.phi_m,self.phi_s,self.phi_r],[self.fm,self.fs,self.fr] for p,f in zip(philist,flist): if f is not None: xpos=cx+r*cos(p); ypos=cy+r*sin(p); xos=0.2*(xpos-cx); yos=0.2*(ypos-cy) plt.plot([0,xpos],[0,ypos],'co-') ax.annotate('{:.3f} Hz'.format(f), xy=(xpos,ypos), xycoords='data', xytext=(xpos+xos,ypos+yos), textcoords='data', #textcoords='offset points', arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=10) ) #plt.xlim(0,0.16) #plt.ylim(-0.1,0.1) plt.axis('equal') if self.ZorY == 'Z': plt.xlabel(r'resistance $R$ in Ohm'); plt.ylabel(r'reactance $X$ in Ohm') if self.ZorY == 'Y': plt.xlabel(r'conductance $G$ in Siemens'); plt.ylabel(r'susceptance $B$ in Siemens') plt.title("fitting the admittance circle with Powell's method") tx1='best fit (fmin_powell):\n' tx1+='center at G+iB = {:.5f} + i*{:.8f}\n'.format(cx,cy) tx1+='radius = {:.5f}; '.format(r) tx1+='residue: {:.2e}'.format(self.resid) txt1=plt.text(-r,cy-1.1*r,tx1,fontsize=8,ha='left',va='top') txt1.set_bbox(dict(facecolor='gray', alpha=0.25)) idxlist=self.to_be_annotated('triple') ofs=self.annotation_offsets(idxlist,factor=0.1,xshift=0.15) for i,j in enumerate(idxlist): xpos,ypos = x[j],y[j]; xos,yos = ofs[i].real,ofs[i].imag ax.annotate('{:.1f} Hz'.format(self.f[j]), xy=(xpos,ypos), xycoords='data', xytext=(xpos+xos,ypos+yos), textcoords='data', #textcoords='offset points', arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=10) ) if self.show: plt.show() else: plt.savefig(join(self.sdc.plotpath,'c{}_fitted_{}_circle'.format(self.sdc.case,self.ZorY)+suffix+'.png'), dpi=240) plt.close()
def plot_pre(fn): t = read_t(fn) y = read_y(fn) yres = read_yres(fn) plt.plot_date(t + _adj_dates, y, 'x', color='lightblue') plt.plot_date(t + _adj_dates, yres, 'x', color='lightgreen') linsec = read_linsec(fn) ch = cut_ts(t, linsec) plt.plot_date(t[ch] + _adj_dates, y[ch], 'x', color='blue', label='original') plt.plot_date(t[ch] + _adj_dates, yres[ch], 'x', color='green', label='residual') outliers = read_outlier(fn) idx = outlier_index(t, outliers) plt.plot_date(t[idx] + _adj_dates, y[idx], 'o', mec='red', mew=1, mfc='blue') plt.plot_date(t[idx] + _adj_dates, yres[idx], 'o', mec='red', mew=1, mfc='green') for jump in read_jumps(fn): plt.axvline(jump + _adj_dates, color='red', ls='--') plt.grid('on') site = basename(fn).split('.')[0] cmpt = basename(fn).split('.')[1] plt.title('%s - %s' % (site, cmpt))
def start_plot(self,w=1.3,connect=False): self.fig=plt.figure() self.ax=plt.axes() plt.axhline(y=0,color='grey', zorder=-1) plt.axvline(x=0,color='grey', zorder=-2) self.plot_data(connect=connect) #plt.axis('equal') self.ax.set_aspect('equal', 'datalim') if self.center is not None: cx,cy=self.center.real,self.center.imag; r=self.radius self.ax.axis([cx-w*r,cx+w*r,cy-w*r,cy+w*r]) else: xmx=amax(self.x); ymn,ymx=amin(self.y),amax(self.y) cx=0.5*xmx; cy=0.5*(ymn+ymx); r=0.5*(ymx-ymn) self.ax.axis([cx-w*r,cx+w*r,cy-w*r,cy+w*r]) if self.ZorY == 'Z': plt.xlabel(r'resistance $R$ in Ohm'); plt.ylabel(r'reactance $X$ in Ohm') if self.ZorY == 'Y': plt.xlabel(r'conductance $G$ in Siemens'); plt.ylabel(r'susceptance $B$ in Siemens')
def get_guess(xdata, ydata, step, use_zeros, num_peaks): runs, zero_runs, non_zero_runs, run_map = get_runs(ydata, step) peaks, peak_idx, max_info = (get_peaks(ydata, num_peaks, non_zero_runs) if not use_zeros else get_peaks( ydata, num_peaks, runs)) # Gross error handling for the case where the max val isn't detected as a # peak (making sure it's added to runs in the correct order) if max_info: max_idx = max_info[0] if run_map[max_idx] >= len(runs): runs.append(max_idx) else: runs[run_map[max_idx]].append(max_idx) # This plots my guesses for the peaks if DEBUG: for idx in peak_idx: plt.axvline(x=idx) # Should rethink widths calculation, it's usually about 1/5 of acutal, # which means the algorithm needs more iterations to get closer. widths = find_widths(xdata, peak_idx, runs, run_map) guess = find_line(zero_runs, runs, xdata, ydata) # This plots my guess for the line if DEBUG: plt.plot([guess[0] * j + guess[1] for j in xdata], '--') for idx, amp in enumerate(peaks): guess += [xdata[peak_idx[idx]], amp, widths[idx] / 4] # Plot my initial guesses for the gaussian(s) if DEBUG: plt.plot([ gaussian(i, xdata[peak_idx[idx]], widths[idx] / 4, amp) for i in xdata ], '--') return [guess, len(runs) if use_zeros else len(non_zero_runs)]
def getGuess(xdata, ydata, step, useZeros, numPeaks): runs, zeroRuns, nonZeroRuns, runMap = getRuns(ydata, step) peaks, peakIdx, maxInfo = (getPeaks(ydata, numPeaks, nonZeroRuns) if not useZeros else getPeaks( ydata, numPeaks, runs)) # Gross error handling for the case where the max val isn't detected as a # peak (making sure it's added to runs in the correct order) if maxInfo: maxIdx = maxInfo[0] if runMap[maxIdx] >= len(runs): runs.append(maxIdx) else: runs[runMap[maxIdx]].append(maxIdx) # This plots my guesses for the peaks if DEBUG: for idx in peakIdx: plt.axvline(x=idx) widths = findWidths(xdata, peakIdx, runs, runMap) guess = findLine(zeroRuns, runs, xdata, ydata) # This plots my guess for the line if DEBUG: plt.plot([guess[0] * j + guess[1] for j in xdata], '--') for idx, amp in enumerate(peaks): guess += [xdata[peakIdx[idx]], amp, widths[idx] / 4] # Plot my initial guesses for the gaussian(s) if DEBUG: plt.plot([ gaussian(i, xdata[peakIdx[idx]], widths[idx] / 4, amp) for i in xdata ], '--') return [guess, len(runs) if useZeros else len(nonZeroRuns)]
def getRuns(data, step): # runMap maps each data point to the run that contains it zeroRuns, nonZeroRuns, runs, currRun, runMap = ([], [], [], [], []) currBuck = getBucket(data[0], step) run_idx = 0 for idx, point in enumerate(data): newBuck = getBucket(point, step) if newBuck == currBuck: currRun.append(idx) else: # Plotting the end of a run if DEBUG: plt.axvline(x=idx) # Three points make a curve! if len(currRun) > 2: runs.append(currRun) run_idx += 1 if currBuck == 0: zeroRuns.append(len(runs) - 1) else: nonZeroRuns.append(currRun) currRun = [idx] currBuck = newBuck runMap.append(run_idx) # Effectively flushing the cache. There has to be a way to factor this out if len(currRun) > 2: runs.append(currRun) if currBuck == 0: zeroRuns.append(len(runs) - 1) else: nonZeroRuns.append(currRun) return [runs, zeroRuns, nonZeroRuns, runMap]
def get_linear_model_histogram(code, ptype="f", dtype="d", start=None, end=None): # 399001','cyb':'zs399006','zxb':'zs399005 # code = '999999' # code = '601608' # code = '000002' # asset = ts.get_hist_data(code)['close'].sort_index(ascending=True) # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True) df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) if not dtype == "d": df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) asset = df["close"] log.info("df:%s" % asset[:1]) asset = asset.dropna() dates = asset.index if not code.startswith("999") or not code.startswith("399"): if code[:1] in ["5", "6", "9"]: code2 = "999999" elif code[:1] in ["3"]: code2 = "399006" else: code2 = "399001" df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True) if not dtype == "d": df1 = tdd.get_tdx_stock_period_to_type(df1, dtype).sort_index(ascending=True) asset1 = df1.loc[asset.index, "close"] startv = asset1[:1] # asset_v=asset[:1] # print startv,asset_v asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # print asset1[:4] # 画出价格随时间变化的图像 # _, ax = plt.subplots() # fig = plt.figure() fig = plt.figure(figsize=(16, 10)) # fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax1 = fig.add_subplot(321) # asset=asset.apply(lambda x:round( x/asset[:1],2)) ax1.plot(asset) # ax1.plot(asset1,'-r', linewidth=2) ticks = ax1.get_xticks() ax1.set_xticklabels([dates[i] for i in ticks[:-1]]) # Label x-axis with dates # 拟合 X = np.arange(len(asset)) x = sm.add_constant(X) model = regression.linear_model.OLS(asset, x).fit() a = model.params[0] b = model.params[1] # log.info("a:%s b:%s" % (a, b)) log.info("X:%s a:%s b:%s" % (len(asset), a, b)) Y_hat = X * b + a # 真实值-拟合值,差值最大最小作为价值波动区间 # 向下平移 i = (asset.values.T - Y_hat).argmin() c_low = X[i] * b + a - asset.values[i] Y_hatlow = X * b + a - c_low # 向上平移 i = (asset.values.T - Y_hat).argmax() c_high = X[i] * b + a - asset.values[i] Y_hathigh = X * b + a - c_high plt.plot(X, Y_hat, "k", alpha=0.9) plt.plot(X, Y_hatlow, "r", alpha=0.9) plt.plot(X, Y_hathigh, "r", alpha=0.9) plt.xlabel("Date", fontsize=14) plt.ylabel("Price", fontsize=14) plt.title(code, fontsize=14) plt.grid(True) # plt.legend([code]); # plt.legend([code, 'Value center line', 'Value interval line']); # fig=plt.fig() # fig.figsize = [14,8] scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax1, base_scale=scale) figPan = zp.pan_factory(ax1) ax2 = fig.add_subplot(323) ticks = ax2.get_xticks() ax2.set_xticklabels([dates[i] for i in ticks[:-1]]) # plt.plot(X, Y_hat, 'k', alpha=0.9) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y, "r", alpha=0.9) c = c + d # asset=asset.apply(lambda x:round(x/asset[:1],2)) ax2.plot(asset) # ax2.plot(asset1,'-r', linewidth=2) plt.xlabel("Date", fontsize=14) plt.ylabel("Price", fontsize=14) plt.grid(True) # plt.title(code, fontsize=14) # plt.legend([code]) # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。 ax3 = fig.add_subplot(322) # distance = (asset.values.T - Y_hat) distance = (asset.values.T - Y_hat)[0] if code.startswith("999") or code.startswith("399"): ax3.plot(asset) plt.plot(distance) ticks = ax3.get_xticks() ax3.set_xticklabels([dates[i] for i in ticks[:-1]]) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y - Y_hat, "r", alpha=0.9) c = c + d ax3.plot(asset) plt.xlabel("Date", fontsize=14) plt.ylabel("Price-center price", fontsize=14) plt.grid(True) else: as3 = asset.apply(lambda x: round(x / asset[:1], 2)) ax3.plot(as3) ax3.plot(asset1, "-r", linewidth=2) plt.grid(True) zp3 = zoompan.ZoomPan() figZoom = zp3.zoom_factory(ax3, base_scale=scale) figPan = zp3.pan_factory(ax3) # plt.title(code, fontsize=14) # plt.legend([code]) # 统计出每个区域内各股价的频数,得到直方图,为了更精细的显示各个区域的频数,这里将整个边界区间分成100份。 ax4 = fig.add_subplot(325) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) # now_distanse=int((asset.iat[-1]-Y_hat[-1])/Y_hat[-1]*100) # log.debug("dis:%s now:%s"%(distance[:2],now_distanse)) # log.debug("now_distanse:%s"%now_distanse) distance = asset.values.T - Y_hat now_distanse = asset.iat[-1] - Y_hat[-1] # distance = (asset.values.T-Y_hat)[0] pd.Series(distance).plot(kind="hist", stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color="red") # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel("Undervalue ------------------------------------------> Overvalue", fontsize=14) plt.ylabel("Frequency", fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]]) plt.grid(True) # plt.show() # import os # print(os.path.abspath(os.path.curdir)) ax5 = fig.add_subplot(326) # fig.figsize=(5, 10) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) distance = (asset.values.T - Y_hat) / Y_hat * 100 now_distanse = (asset.iat[-1] - Y_hat[-1]) / Y_hat[-1] * 100 log.debug("dis:%s now:%s" % (distance[:2], now_distanse)) log.debug("now_distanse:%s" % now_distanse) # n, bins = np.histogram(distance, 50) # print n, bins[:2] pd.Series(distance).plot(kind="hist", stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color="red") # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel("Undervalue ------------------------------------------> Overvalue", fontsize=14) plt.ylabel("Frequency", fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]]) plt.grid(True) ax6 = fig.add_subplot(324) h = df.loc[:, ["open", "close", "high", "low"]] highp = h["high"].values lowp = h["low"].values openp = h["open"].values closep = h["close"].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) bV = [] bP = [] for i in range(1, len(highp) - 1): if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) d, p = LIS(bV) idx = [] for i in range(len(p)): idx.append(bP[p[i]]) lr = LinearRegression() X = np.atleast_2d(np.array(idx)).T Y = np.array(d) lr.fit(X, Y) estV = lr.predict(xt) ax6.plot(closep, linewidth=2) ax6.plot(idx, d, "ko") ax6.plot(xt, estV, "-r", linewidth=3) ax6.plot(xt, yt, "-g", linewidth=3) plt.grid(True) # plt.tight_layout() zp2 = zoompan.ZoomPan() figZoom = zp2.zoom_factory(ax6, base_scale=scale) figPan = zp2.pan_factory(ax6) show()
def b_spline_over_sections(self, sj, j): sms = [] for s1, s2 in zip(sj[0:-1],sj[1:]): sm = (s1+s2)/2. sms.append(sm) sms = asarray(sms) y = self.b_spline(sms-sms[j]) return sms, y if __name__ == '__main__': from pylab import plt ds = 20 func = CubicBSplines(ds=ds) s = arange(0, 701, 20) sm, y = func.b_spline_over_sections(s,30) plt.plot(sm,y,color='red',marker='o') for si in s: plt.axvline(si, color='gray') plt.show()
# print(t_max) temp = risk['drawdown'][risk['drawdown'] == 0] periods = (temp.index[1:].to_pydatetime() - temp.index[:-1].to_pydatetime()) periods[20:30] # print(periods) # t_per = periods.max() # print(t_per) # t_per = periods.max() # t_per.seconds / 60 / 60 risk[['equity', 'cummax']].plot(figsize=(10, 6)) plt.axvline(t_max, c='r', alpha=0.5) # plt.show() percs = np.array([0.01, 0.1, 1., 2.5, 5.0, 10.0]) risk['returns'] = np.log(risk['equity'] / risk['equity'].shift(1)) VaR = scs.scoreatpercentile(equity * risk['returns'], percs) def print_var(): print('%16s %16s' % ('Confidence Level', 'Value-at-Risk')) print(33 * '-') for pair in zip(percs, VaR): print('%16.2f %16.3f' % (100 - pair[0], -pair[1]))
maturity_list = [third_fridays[3]] # only 18. April 2014 maturity # start the calibration parameters = srd_get_parameter_series(pricing_date_list, maturity_list) # plot the results for mat in maturity_list: # fig1, ax1 = plt.subplots() to_plot = parameters[parameters.maturity == maturity_list[0]].set_index('date')[[ 'kappa', 'theta', 'sigma', 'MSE' ]] to_plot.plot(subplots=True, color='b', figsize=(10, 12), title='SRD | ' + str(mat)[:10]) plt.tight_layout() plt.savefig('./images/dx_srd_cali_1_%s_.png' % str(mat)[:10]) # plotting the histogram of the MSE values fig, ax = plt.subplots() dat = parameters.MSE dat.hist(bins=30, ax=ax) plt.axvline(dat.mean(), color='r', ls='dashed', lw=1.5, label='mean = %5.4f' % dat.mean()) plt.legend() plt.tight_layout() plt.savefig('./images/dx_srd_cali_1_hist_%s_.png' % str(mat)[:10]) # measuring and printing the time needed for the script execution print('Time in minutes %.2f' % ((time.time() - t0) / 60))
def get_linear_model_histogramDouble(code, ptype='f', dtype='d', start=None, end=None, vtype='close', filter='n', df=None): # 399001','cyb':'zs399006','zxb':'zs399005 # code = '999999' # code = '601608' # code = '000002' # asset = ts.get_hist_data(code)['close'].sort_index(ascending=True) # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True) # vtype='close' # if vtype == 'close' or vtype=='' # ptype= if start is not None and filter == 'y': if code not in ['999999', '399006', '399001']: index_d, dl = tdd.get_duration_Index_date(dt=start) log.debug("index_d:%s dl:%s" % (str(index_d), dl)) else: index_d = cct.day8_to_day10(start) log.debug("index_d:%s" % (index_d)) start = tdd.get_duration_price_date(code, ptype='low', dt=index_d) log.debug("start:%s" % (start)) if df is None: # df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) df = tdd.get_tdx_append_now_df_api(code, ptype, start, end).sort_index(ascending=True) if not dtype == 'd': df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) asset = df[vtype] log.info("df:%s" % asset[:1]) asset = asset.dropna() dates = asset.index if not code.startswith('999') or not code.startswith('399'): if code[:1] in ['5', '6', '9']: code2 = '999999' elif code[:1] in ['3']: code2 = '399006' else: code2 = '399001' df1 = tdd.get_tdx_append_now_df_api(code2, ptype, start, end).sort_index(ascending=True) # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True) if not dtype == 'd': df1 = tdd.get_tdx_stock_period_to_type(df1, dtype).sort_index(ascending=True) asset1 = df1.loc[asset.index, vtype] startv = asset1[:1] # asset_v=asset[:1] # print startv,asset_v asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # print asset1[:4] # 画出价格随时间变化的图像 # _, ax = plt.subplots() # fig = plt.figure() fig = plt.figure(figsize=(16, 10)) # fig = plt.figure(figsize=(16, 10), dpi=72) # fig.autofmt_xdate() #(no fact) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax1 = fig.add_subplot(321) # asset=asset.apply(lambda x:round( x/asset[:1],2)) ax1.plot(asset) # ax1.plot(asset1,'-r', linewidth=2) ticks = ax1.get_xticks() # start, end = ax1.get_xlim() # print start, end, len(asset) # print ticks, ticks[:-1] # (ticks[:-1] if len(asset) > end else np.append(ticks[:-1], len(asset) - 1)) ax1.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) # Label x-axis with dates # 拟合 X = np.arange(len(asset)) x = sm.add_constant(X) model = regression.linear_model.OLS(asset, x).fit() a = model.params[0] b = model.params[1] # log.info("a:%s b:%s" % (a, b)) log.info("X:%s a:%s b:%s" % (len(asset), a, b)) Y_hat = X * b + a # 真实值-拟合值,差值最大最小作为价值波动区间 # 向下平移 i = (asset.values.T - Y_hat).argmin() c_low = X[i] * b + a - asset.values[i] Y_hatlow = X * b + a - c_low # 向上平移 i = (asset.values.T - Y_hat).argmax() c_high = X[i] * b + a - asset.values[i] Y_hathigh = X * b + a - c_high plt.plot(X, Y_hat, 'k', alpha=0.9); plt.plot(X, Y_hatlow, 'r', alpha=0.9); plt.plot(X, Y_hathigh, 'r', alpha=0.9); # plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.title(code + " | " + str(dates[-1])[:11], fontsize=14) plt.legend([asset.iat[-1]], fontsize=12, loc=4) plt.grid(True) # plt.legend([code]); # plt.legend([code, 'Value center line', 'Value interval line']); # fig=plt.fig() # fig.figsize = [14,8] scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax1, base_scale=scale) figPan = zp.pan_factory(ax1) ax2 = fig.add_subplot(323) # ax2.plot(asset) # ticks = ax2.get_xticks() ax2.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) # plt.plot(X, Y_hat, 'k', alpha=0.9) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y, 'r', alpha=0.9); c = c + d # asset=asset.apply(lambda x:round(x/asset[:1],2)) ax2.plot(asset) # ax2.plot(asset1,'-r', linewidth=2) # plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.grid(True) # plt.title(code, fontsize=14) # plt.legend([code]) # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。 ax3 = fig.add_subplot(322) # distance = (asset.values.T - Y_hat) distance = (asset.values.T - Y_hat)[0] if code.startswith('999') or code.startswith('399'): ax3.plot(asset) plt.plot(distance) ticks = ax3.get_xticks() ax3.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y - Y_hat, 'r', alpha=0.9); c = c + d ax3.plot(asset) # plt.xlabel('Date', fontsize=12) plt.ylabel('Price-center price', fontsize=14) plt.grid(True) else: as3 = asset.apply(lambda x: round(x / asset[:1], 2)) ax3.plot(as3) ax3.plot(asset1, '-r', linewidth=2) plt.grid(True) zp3 = zoompan.ZoomPan() figZoom = zp3.zoom_factory(ax3, base_scale=scale) figPan = zp3.pan_factory(ax3) # plt.title(code, fontsize=14) # plt.legend([code]) # 统计出每个区域内各股价的频数,得到直方图,为了更精细的显示各个区域的频数,这里将整个边界区间分成100份。 ax4 = fig.add_subplot(325) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) # now_distanse=int((asset.iat[-1]-Y_hat[-1])/Y_hat[-1]*100) # log.debug("dis:%s now:%s"%(distance[:2],now_distanse)) # log.debug("now_distanse:%s"%now_distanse) distance = (asset.values.T - Y_hat) now_distanse = asset.iat[-1] - Y_hat[-1] # distance = (asset.values.T-Y_hat)[0] pd.Series(distance).plot(kind='hist', stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color='red') # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel('Undervalue ------------------------------------------> Overvalue', fontsize=12) plt.ylabel('Frequency', fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1], str(dates[-1])[5:11]], fontsize=12) plt.grid(True) # plt.show() # import os # print(os.path.abspath(os.path.curdir)) ax5 = fig.add_subplot(326) # fig.figsize=(5, 10) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) distance = (asset.values.T - Y_hat) / Y_hat * 100 now_distanse = ((asset.iat[-1] - Y_hat[-1]) / Y_hat[-1] * 100) log.debug("dis:%s now:%s" % (distance[:2], now_distanse)) log.debug("now_distanse:%s" % now_distanse) # n, bins = np.histogram(distance, 50) # print n, bins[:2] pd.Series(distance).plot(kind='hist', stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color='red') # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel('Undervalue ------------------------------------------> Overvalue', fontsize=14) plt.ylabel('Frequency', fontsize=12) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]], fontsize=12) plt.grid(True) ax6 = fig.add_subplot(324) h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) bV = [] bP = [] for i in range(1, len(highp) - 1): if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) d, p = LIS(bV) idx = [] for i in range(len(p)): idx.append(bP[p[i]]) lr = LinearRegression() X = np.atleast_2d(np.array(idx)).T Y = np.array(d) lr.fit(X, Y) estV = lr.predict(xt) ax6.plot(closep, linewidth=2) ax6.plot(idx, d, 'ko') ax6.plot(xt, estV, '-r', linewidth=3) ax6.plot(xt, yt, '-g', linewidth=3) plt.grid(True) # plt.tight_layout() zp2 = zoompan.ZoomPan() figZoom = zp2.zoom_factory(ax6, base_scale=scale) figPan = zp2.pan_factory(ax6) # plt.ion() plt.show(block=False)
def run(*args): import pkg_resources if args[0] == 'test': TEST_PATH = pkg_resources.resource_filename('confine', 'TEST/') dir_in = TEST_PATH + 'test.csv' f = 'test' lcc_min = 50 lcc_max = 350 else: f = args[0] dir_in = str(raw_input("Where the file is located in? ")) print "You entered: ------", str(f), ' ------' lcc_min = int( raw_input( "Enter the minimum size of LCC, we recommend a number between 30 and 50: " )) lcc_max = int( raw_input( "Enter the maximum size of LCC, we recommend a number between 300 and 500: " )) print '.....loading data.....' import os import pickle import time start_time = time.time() NET_PATH = pkg_resources.resource_filename('confine', 'NET/') id_to_sym = pickle.load(open(NET_PATH + 'id_to_sym_human.p', 'r')) G = pickle.load(open(NET_PATH + 'PPI_2015_raw.p', 'r')) file = open(dir_in, "r") initial_data = file.read().splitlines() file.close() threshold = 0.05 gene = [] pval = [] for row in initial_data: n = row.strip().split(',') p = float(n[1].strip()) g = int(n[0].strip()) if p <= threshold: gene.append(g) pval.append(p) print 'Number of genes with P.val<0.05: ', len(gene) print '.....Identifying disease module.....' data = zip(gene, pval) #--------------------- from func import CONFINE as conf result = conf(data, G, lcc_min, lcc_max) z_list = result[0] pval_cut_list = result[1] sig_Cluster_LCC = result[2] z_score = z_list[result[3]] p_val_cut = pval_cut_list[result[3]] print '--------------------' print 'LCC size: ', len(sig_Cluster_LCC.nodes()) print 'Z-score: ', z_score print 'P.val cut-off: ', p_val_cut print '--------------------' directory_name = f + '_' + str(time.time()) if not os.path.exists(directory_name): os.makedirs(directory_name) b = open(directory_name + '/LCC_' + f + '.txt', "w") for node in sig_Cluster_LCC.nodes(): try: print >> b, str(id_to_sym[int(node)]) + ',' + str(int(node)) except KeyError: print >> b, ' ' + ',' + str(int(node)) b.close() from pylab import plt, matplotlib fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111) #----------------------------------------------------plotting-------- ax.plot(pval_cut_list, z_list, 'o', color='saddlebrown', markersize=4) plt.axvline(x=p_val_cut, color='r', linestyle='--') ax.set_xlabel('P.value cut-off', fontsize=25, fontweight='bold', labelpad=18) ax.set_ylabel('Z-Score', fontsize=25, fontweight='bold', labelpad=18) ax.set_title('LCC' + ' = ' + str(len(sig_Cluster_LCC.nodes())) + ' ,' + ' Z-Score' + ' = ' + str("{0:.3f}".format(round(z_score, 4))), fontsize=20, fontweight='bold') ax.grid(True) plt.ylim(min(z_list) - min(z_list) / 5, max(z_list) + max(z_list) / 3) plt.savefig(directory_name + '/' + f + '.png', dpi=150, bbox_inches='tight') plt.close() print("--- %s seconds ---" % (time.time() - start_time))
fn_vec = frompyfunc(self._b_spline_scalar, 1, 1) res = fn_vec(asarray(s - 2. * self.ds, float)) return asarray(res, float) def b_spline_over_sections(self, sj, j): sms = [] for s1, s2 in zip(sj[0:-1], sj[1:]): sm = (s1 + s2) / 2. sms.append(sm) sms = asarray(sms) y = self.b_spline(sms - sms[j]) return sms, y if __name__ == '__main__': from pylab import plt ds = 20 func = CubicBSplines(ds=ds) s = arange(0, 701, 20) sm, y = func.b_spline_over_sections(s, 30) plt.plot(sm, y, color='red', marker='o') for si in s: plt.axvline(si, color='gray') plt.show()
# In[75]: plt.figure(figsize=(8, 4)) plt.scatter(pvols, prets, c=(prets - 0.01) / pvols, marker='o') # random portfolio composition plt.plot(evols, erets, 'g', lw=4.0) # efficient frontier cx = np.linspace(0.0, 0.3) plt.plot(cx, opt[0] + opt[1] * cx, lw=1.5) # capital market line plt.plot(opt[2], f(opt[2]), 'r*', markersize=15.0) plt.grid(True) plt.axhline(0, color='k', ls='--', lw=2.0) plt.axvline(0, color='k', ls='--', lw=2.0) plt.xlabel('expected volatility') plt.ylabel('expected return') plt.colorbar(label='Sharpe ratio') # tag: portfolio_4 # title: Capital market line and tangency portfolio (star) for risk-free rate of 1% # size: 90 # In[76]: cons = ({'type': 'eq', 'fun': lambda x: statistics(x)[0] - f(opt[2])}, {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) res = sco.minimize(min_func_port, noa * [1. / noa,], method='SLSQP', bounds=bnds, constraints=cons)
if not isnan(tup[1]) ] plt.plot(plot_alphas, plot_iters, label=u"x0 = 1, x1 = x0 + 0.00001") mi_newton = Newton(entradas=alphas, x0=0.1, x1=0.1 + 0.00001) mi_newton.run() resultado = mi_newton.resultados valores = [res['resultado'] for res in resultado] plot_alphas = [tup[0] for tup in zip(alphas, valores) if not isnan(tup[1])] plot_iters = [ tup[0]['iteraciones'] for tup in zip(resultado, valores) if not isnan(tup[1]) ] plt.plot(plot_alphas, plot_iters, label=u"x0 = 0.1, x1 = x0 + 0.00001") plt.axvline(x=1, linestyle='--', label=u"α = 1") legend() # alphas = [10 ** n for n in range(-10, 20, 1)] # # plt.figure(2) # plt.xlabel(u'α') # plt.ylabel('iteraciones') # plt.semilogx() # # # codigo # # x0s = deepcopy(alphas) # mi_newton = Newton(entradas=alphas, x0s=x0s) # mi_newton.run()
from datetime import date import numpy as np from pylab import plt import viscojapan as vj site = 'J550' cmpt = 'e' tp = np.loadtxt('../../tsana/pre_fit/linres/{site}.{cmpt}.lres'.\ format(site=site, cmpt=cmpt)) days = tp[:,0] yres = tp[:,2] plt.plot_date(days + vj.adjust_mjd_for_plot_date, yres, 'x') plt.grid('on') plt.axvline(date(2011,3,11),color='r', ls='--') plt.ylim([-1, 7]) plt.xlim((date(2010,1,1), date(2015,1,1))) plt.gcf().autofmt_xdate() plt.ylabel('m') plt.title('%s - %s'%(site, cmpt)) plt.savefig('%s_%s.pdf'%(site, cmpt)) plt.show()
def walk_direction_preheel(ax, ay, az, t, sample_rate, stride_fraction=1.0/8.0, threshold=0.5, order=4, cutoff=5, plot_test=False): """ Estimate local walk (not cardinal) direction with pre-heel strike phase. Inspired by Nirupam Roy's B.E. thesis: "WalkCompass: Finding Walking Direction Leveraging Smartphone's Inertial Sensors," this program derives the local walk direction vector from the end of the primary leg's stride, when it is decelerating in its swing. While the WalkCompass relies on clear heel strike signals across the accelerometer axes, this program just uses the most prominent strikes, and estimates period from the real part of the FFT of the data. NOTE:: This algorithm computes a single walk direction, and could compute multiple walk directions prior to detected heel strikes, but does NOT estimate walking direction for every time point, like walk_direction_attitude(). Parameters ---------- ax : list or numpy array x-axis accelerometer data ay : list or numpy array y-axis accelerometer data az : list or numpy array z-axis accelerometer data t : list or numpy array accelerometer time points sample_rate : float sample rate of accelerometer reading (Hz) stride_fraction : float fraction of stride assumed to be deceleration phase of primary leg threshold : float ratio to the maximum value of the summed acceleration across axes plot_test : Boolean plot most prominent heel strikes? Returns ------- direction : numpy array of three floats unit vector of local walk (not cardinal) direction Examples -------- >>> from mhealthx.xio import read_accel_json >>> from mhealthx.signals import compute_sample_rate >>> input_file = '/Users/arno/DriveWork/mhealthx/mpower_sample_data/deviceMotion_walking_outbound.json.items-a2ab9333-6d63-4676-977a-08591a5d837f5221783798792869048.tmp' >>> device_motion = True >>> start = 150 >>> t, axyz, gxyz, uxyz, rxyz, sample_rate, duration = read_accel_json(input_file, start, device_motion) >>> ax, ay, az = axyz >>> from mhealthx.extractors.pyGait import walk_direction_preheel >>> threshold = 0.5 >>> stride_fraction = 1.0/8.0 >>> order = 4 >>> cutoff = max([1, sample_rate/10]) >>> plot_test = True >>> direction = walk_direction_preheel(ax, ay, az, t, sample_rate, stride_fraction, threshold, order, cutoff, plot_test) """ import numpy as np from mhealthx.extractors.pyGait import heel_strikes from mhealthx.signals import compute_interpeak # Sum of absolute values across accelerometer axes: data = np.abs(ax) + np.abs(ay) + np.abs(az) # Find maximum peaks of smoothed data: plot_test2 = False dummy, ipeaks_smooth = heel_strikes(data, sample_rate, threshold, order, cutoff, plot_test2, t) # Compute number of samples between peaks using the real part of the FFT: interpeak = compute_interpeak(data, sample_rate) decel = np.int(np.round(stride_fraction * interpeak)) # Find maximum peaks close to maximum peaks of smoothed data: ipeaks = [] for ipeak_smooth in ipeaks_smooth: ipeak = np.argmax(data[ipeak_smooth - decel:ipeak_smooth + decel]) ipeak += ipeak_smooth - decel ipeaks.append(ipeak) # Plot peaks and deceleration phase of stride: if plot_test: from pylab import plt if isinstance(t, list): tplot = np.asarray(t) - t[0] else: tplot = np.linspace(0, np.size(ax), np.size(ax)) idecel = [x - decel for x in ipeaks] plt.plot(tplot, data, 'k-', tplot[ipeaks], data[ipeaks], 'rs') for id in idecel: plt.axvline(x=tplot[id]) plt.title('Maximum stride peaks') plt.show() # Compute the average vector for each deceleration phase: vectors = [] for ipeak in ipeaks: decel_vectors = np.asarray([[ax[i], ay[i], az[i]] for i in range(ipeak - decel, ipeak)]) vectors.append(np.mean(decel_vectors, axis=0)) # Compute the average deceleration vector and take the opposite direction: direction = -1 * np.mean(vectors, axis=0) # Return the unit vector in this direction: direction /= np.sqrt(direction.dot(direction)) # Plot vectors: if plot_test: from mhealthx.utilities import plot_vectors dx = [x[0] for x in vectors] dy = [x[1] for x in vectors] dz = [x[2] for x in vectors] hx, hy, hz = direction title = 'Average deceleration vectors + estimated walk direction' plot_vectors(dx, dy, dz, [hx], [hy], [hz], title) return direction
ys1 += [yi,yi] plt.fill_between(ts, ys1, np.zeros_like(ys1), color='blue') ys2 = [] for yi in mean_percentage_Rco: ys2 += [1-yi, 1-yi] plt.fill_between(ts, ys2, np.ones_like(ys2), color='green') obj = plt.fill_between(ts, ys1, ys2, color='red') plt.grid('off') label_patch1 = mpatches.Patch(color='green') label_patch2 = mpatches.Patch(color='red') label_patch3 = mpatches.Patch(color='blue') plt.legend([label_patch1, label_patch2, label_patch3], [r'$R^{\bf{co}}$', r'$R^{\bf{aslip}}$',r'$E^{\bf{aslip}}$'], bbox_to_anchor=(1.13,1.01)) #plt.gca().set_xscale('log') for epoch in epochs: plt.axvline(epoch,ls='--',color='gray') plt.xlabel('days after the mainshock') plt.ylabel('percentage') plt.savefig('plots/percentage_components_each_section.png') plt.savefig('plots/percentage_components_each_section.pdf') plt.show()
for t in range( 1, M + 1 ): expr = ( r - 0.5 * sigma **2 ) * dt + sigma * math.sqrt( dt ) * np.random.standard_normal( N ) S_BM[ t ] = S_BM[ t - 1 ] * np.exp( expr ) del expr S_T = S_BM[ -1 ] mu_3s = np.mean( S_T ) + 3 * np.std( S_T ) mu_5s = np.mean( S_T ) + 5 * np.std( S_T ) #............................................................................ # Histogram of final values #............................................................................ plt.hist( S_T, bins = 100, edgecolor = 'darkgray', color = 'darkblue' ) plt.axvline( S0, linestyle = 'dashed', alpha = 0.8, color = 'darkred' ) plt.axvline( np.mean( S_T ) , linestyle = 'dashed', alpha = 0.8, color = 'red' ) plt.axvline( mu_3s, linestyle = 'dashed', alpha = 0.8, color = 'red' ) plt.annotate( s = '$\mu + 3\sigma = $' + str( round( mu_3s ,1) ), xy=(mu_3s, 500 ) ) plt.axvline( mu_5s, linestyle = 'dashed', alpha = 0.8, color = 'red' ) plt.annotate( s = '$\mu + 5\sigma = $' + str( round( mu_5s ,1) ), xy=(mu_5s, 500 ) ) plt.xlabel('index level' ) plt.ylabel('frequency' ) plt.title( 'Geometric Brownian Motion: distribution of $S_T$' ) #............................................................................ # Sample of paths
# The sequential mode is designed to work with exactly one input tensor and one output tensor. model = Sequential() model.add(SimpleRNN(500, activation='relu', input_shape=(lags, 1))) model.add(Dense(1, activation='linear')) model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) model.summary() model.fit(g, epochs=500, steps_per_epoch=10, verbose=False) x = np.linspace(-6 * np.pi, 6 * np.pi, 1000) d = transform(x) g_ = TimeseriesGenerator(d, d, length=lags, batch_size=len(d)) f = list(g_)[0][0].reshape((len(d) - lags, lags, 1)) y = model.predict(f, verbose=False) plt.figure(figsize=(10, 6)) plt.plot(x[lags:], d[lags:], label='data', alpha=0.75) plt.plot(x[lags:], y, 'r.', label='pred', ms=3) plt.axvline(-2 * np.pi, c='g', ls='--') plt.axvline(2 * np.pi, c='g', ls='--') plt.text(-15, 22, 'out-of-sample') plt.text(-2, 22, 'in-sample') plt.text(10, 22, 'out-of-sample') plt.legend()
for lag in range(1, lags + 1): col = 'lag_{}'.format(lag) data[col] = data['returns'].shift(lag) cols.append(col) create_lags(data) data.dropna(inplace=True) data.plot.scatter(x='lag_1', y='lag_2', c='returns', cmap='coolwarm', figsize=(10, 6), colorbar=True) plt.axvline(0, c='r', ls='--') plt.axhline(0, c='r', ls='--') plt.title('Scatter plot based on features and labels data') # Linear OLS can now be implemented to learn about any potential linear # relationships, to predict market movement based on features and to backtest # such predictions. Two basic approaches are available: using log returns or only # the direction data as the dependent variable. from sklearn.linear_model import LinearRegression model = LinearRegression() # Regression on log returns directly data['pos_ols_1'] = model.fit(data[cols], data['returns']).predict(data[cols]) # Regression on direction data (which is of primary interest)