def figure_annual_mean(plot_dir,scenarios,variables,positions,start,end,fig_size,fmt_mean,fmt_smoothing,smoothing_step,\ linewidth,linewidth_moving,legend_loc,legend_size,xlabel,ymin_limit,ymax_limit,label_size,tick_size,\ height_ratio,width_ratio,wspace,hspace,store_dir_annual_mean0,pos_names): for scenario in scenarios: for var in variables: y_unit=get_yunit(var) ylabel = get_var_name(var) grid = gridspec.GridSpec(2,2,height_ratios=height_ratio,width_ratios=width_ratio,wspace=wspace,hspace=hspace) fig = plt.figure(figsize=(fig_size)) store_dir = store_dir_annual_mean0.format(scenario,var) for idx,pos in enumerate(positions): pos_name = pos_names[idx] file_fldmean = plot_dir.format(scenario,var,pos_name) #ymin_limit,ymax_limit= get_vmin_vmax(file_fldmean,None,None) fig_txt_mean = pos_name + ' Annual Mean ' + '(' + scenario + ')' fig_txt_smoothing = '{} Years Moving Average'.format(smoothing_step) ax = fig.add_subplot(grid[idx]) df = get_data_ts(file_fldmean,start,end) df.resample('A',how='mean').plot(style=fmt_mean,label=fig_txt_mean) df_mean = df.resample('A',how='mean') pds.rolling_mean(df_mean,smoothing_step,center=True).plot(style=fmt_smoothing,label=fig_txt_smoothing,linewidth=linewidth_moving) ax.legend(loc=legend_loc,prop={'size':legend_size}) ax.set_xlabel(xlabel,fontsize=label_size) ax.set_ylabel(ylabel+y_unit,fontsize=label_size) ax.set_ylim([ymin_limit,ymax_limit]) ax.tick_params(axis='both',which='major',labelsize=tick_size) fig.savefig(store_dir,dpi=300)
def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: raise nose.SkipTest("doesn't work on python 3") # #2114 data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' arr = np.frombuffer(data, dtype='<f8') if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0)
def figure_monthly_mean_cat(plot_dir,scenarios,variables,positions,start,end,fig_size,fmt_mean,fmt_smoothing,smoothing_step,linewidth,\ linewidth_moving,legend_loc,legend_size,xlabel,ymin_limit,ymax_limit,label_size,tick_size,height_ratio,width_ratio, wspace,hspace,store_dir_monthly_mean0,pos_names,linewidth_axv,color_axv): for scenario in scenarios: for var in variables: y_unit=get_yunit(var) ylabel = get_var_name(var) grid = gridspec.GridSpec(2,2,height_ratios=height_ratio,width_ratios=width_ratio,wspace=wspace,hspace=hspace) fig = plt.figure(figsize=(fig_size)) store_dir = store_dir_monthly_mean0.format(scenario,var) for idx,pos in enumerate(positions): pos_name = pos_names[idx] file_fldmean = plot_dir.format(scenario,var,pos_name) fig_txt_mean = pos_name + ' Monthly Mean ' fig_txt_smoothing = '{} Months Moving Average'.format(smoothing_step) ax = fig.add_subplot(grid[idx]) df = get_data_ts(file_fldmean,start,end) df.plot(style=fmt_mean,label=fig_txt_mean) pds.rolling_mean(df,smoothing_step,center=True).plot(style=fmt_smoothing,label=fig_txt_smoothing,linewidth=linewidth_moving) div=dt.datetime(2005,12,31) ax.axvline(x=div,linewidth=linewidth_axv,color=color_axv) ax.legend(loc=legend_loc,prop={'size':legend_size}) ax.set_xlabel(xlabel,fontsize=label_size) ax.set_ylabel(ylabel+y_unit,fontsize=label_size) ax.set_ylim([ymin_limit,ymax_limit]) ax.tick_params(axis='both',which='major',labelsize=tick_size) fig.savefig(store_dir,dpi=300)
def plot_annual_mean_together(outdir_fldmean_month_series,start,end,variables,scenarios,store_dir_annual_mean0,fig_size,\ smoothing_step,linewidth,legend_loc,legend_size,xlabel,ylabel,ymin_limit,\ ymax_limit,label_size,tick_size): for var in variables: if var == 'SOIL_MOIST' or var == 'SWE': y_unit = '(mm)' elif var == 'TMAX' or var == 'TMIN' or var == 'TAVG': y_unit = ' ($^\circ$C)' else: y_unit = ' (mm/day)' for scenario in scenarios: fig = plt.figure(figsize=(fig_size)) store_dir_annual_mean = store_dir_annual_mean0.format(scenario,var) ax = fig.add_subplot(111) for idx,pos in enumerate(positions): pos_name = pos_names[idx] fig_txt_mean = pos_name + ' Monthly Mean ' + '(' + scenario + ')' fig_txt_smoothing = '{} Months Moving Average'.format(smoothing_step) fmt = fmts[idx] fig_txt= pos_name ifile = outdir_fldmean_month_series.format(scenario,var,pos_name) df = get_data(ifile,start,end) df_mean = df.resample('A',how='mean') pds.rolling_mean(df_mean,smoothing_step,center=True).plot(style=fmt,label=fig_txt,linewidth=linewidth) ax.legend(loc=legend_loc,prop={'size':legend_size}) ax.set_xlabel(xlabel,fontsize=label_size) ax.set_ylabel(ylabel+y_unit,fontsize=label_size) ax.set_ylim([ymin_limit,ymax_limit]) ax.tick_params(axis='both',which='major',labelsize=tick_size) fig.savefig(store_dir_annual_mean,dpi=300)
def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: raise nose.SkipTest # #2114 data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' arr = np.frombuffer(data, dtype='<f8') if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0)
def emvvalue(em, n, m): import numpy as np from pandas.stats.moments import rolling_mean # emv = moving_average(em, (n if n < len(em) else len(em)-1), type='simple') # maemv = moving_average(emv, (m if m < len(emv) else len(emv)-1), type='simple') emv = np.nan_to_num(rolling_mean(np.asarray(em), n)) maemv = np.nan_to_num(rolling_mean(np.asarray(emv), m)) return emv, maemv
def rename_vix(df): df = df.rename(columns={'Adj Close': 'price'}) df['sma_short'] = st.rolling_mean(df['price'], 20) df['sma_long'] = st.rolling_mean(df['price'], 80) df = df.fillna(method='ffill') df = df[['price', 'sid', 'Open', 'sma_short', 'sma_long']] log.info(' \n %s % df.head()') return df #.shift(1) #To avoid forward looking bias
def findEvents(symbols, startday,endday,verbose=False): timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Norgate') if verbose: print __name__ + " reading data" adjclose = dataobj.get_data(timestamps, symbols, closefield) adjclose = (adjclose.fillna()).fillna(method='backfill') adjcloseSPY = dataobj.get_data(timestamps, ['SPY'], closefield) adjcloseSPY = (adjcloseSPY.fillna()).fillna(method='backfill') if verbose: print __name__ + " finding events" # for symbol in symbols: # close[symbol][close[symbol]>= 1.0] = np.NAN # for i in range(1,len(close[symbol])): # if np.isnan(close[symbol][i-1]) and close[symbol][i] < 1.0 :#(i-1)th was > $1, and (i)th is <$1 # close[symbol][i] = 1.0 #overwriting the price by the bit # close[symbol][close[symbol]< 1.0] = np.NAN #print adjclose # Get the 20 day moving avg and moving stddev movavg = pa.rolling_mean(adjclose,20,min_periods=20) movavgSPY = pa.rolling_mean(adjcloseSPY,20,min_periods=20) movstddev = pa.rolling_std(adjclose, 20, min_periods=20) movstddevSPY = pa.rolling_std(adjcloseSPY, 20, min_periods=20) upperband = movavg + 2*movstddev upperbandSPY = movavgSPY + 2*movstddevSPY lowerband = movavg - 2*movstddev lowerbandSPY = movavgSPY - 2*movstddevSPY # Compute the bollinger %b indicator for all stocks normalizedindicator = 2*(adjclose - movavg)/(upperband - lowerband) #print normalizedindicator normalizedindicatorSPY = 2*(adjcloseSPY - movavgSPY)/(upperbandSPY - lowerbandSPY) #print normalizedindicatorSPY #bandwidth = (upperband - lowerband)/movavg #print bandwidth #print upperband # Compute the event matrix as follows: # Set periods of low volatility to 1 # In from the period of low volatility to the period of say, 15 days, following low volatility # if the stock price breaks above the upper band, there is a surge. this is a positive event. Set this event to 2 # Finally, set all events other than 2 to NaN. Then, set all 2's to 1 eventMatrix = adjclose.copy() for symbol in symbols: for row in range(len(adjclose[:][symbol])): eventMatrix[symbol][row] = np.NAN if normalizedindicator[symbol][row] - normalizedindicatorSPY['SPY'][row] >= 0.5: eventMatrix[symbol][row] = 1 return eventMatrix
def plot_monthly_mean(ifile,start,end,store_dir,fig_size,fmt_mean,fmt_smoothing,fig_txt_mean,fig_txt_smoothing,smoothing_step,linewidth,\ legend_loc,legend_size,xlabel,ylabel,y_unit,ymin_limit,ymax_limit,label_size,tick_size,pos_name): df = get_data(ifile,start,end) df.plot(style=fmt_mean,label=fig_txt_mean) pds.rolling_mean(df,smoothing_step,center=True).plot(style=fmt_smoothing,label=fig_txt_smoothing,linewidth=2.5) ax.legend(loc=legend_loc,prop={'size':legend_size}) ax.set_xlabel(xlabel,fontsize=label_size) ax.set_ylabel(ylabel+y_unit,fontsize=label_size) ax.set_ylim([ymin_limit,ymax_limit]) ax.tick_params(axis='both',which='major',labelsize=tick_size)
def kst(s, r1=10, r2=15, r3=20, r4=30, n1=10, n2=10, n3=10, n4=15, nsig=9): rocma1 = moments.rolling_mean(s / s.shift(r1) - 1, n1) rocma2 = moments.rolling_mean(s / s.shift(r2) - 1, n2) rocma3 = moments.rolling_mean(s / s.shift(r3) - 1, n3) rocma4 = moments.rolling_mean(s / s.shift(r4) - 1, n4) kst = 100*(rocma1 + 2*rocma2 + 3*rocma3 + 4*rocma4) sig = moments.rolling_mean(kst, nsig) return DataFrame(dict(kst=kst, signal=sig))
def kst(s, r1=10, r2=15, r3=20, r4=30, n1=10, n2=10, n3=10, n4=15, nsig=9): rocma1 = moments.rolling_mean(s / s.shift(r1) - 1, n1) rocma2 = moments.rolling_mean(s / s.shift(r2) - 1, n2) rocma3 = moments.rolling_mean(s / s.shift(r3) - 1, n3) rocma4 = moments.rolling_mean(s / s.shift(r4) - 1, n4) kst = 100 * (rocma1 + 2 * rocma2 + 3 * rocma3 + 4 * rocma4) sig = moments.rolling_mean(kst, nsig) return DataFrame(dict(kst=kst, signal=sig))
def stoch(s, nfastk=14, nfullk=3, nfulld=3): if not isinstance(s, DataFrame): s = DataFrame(dict(high=s, low=s, close=s)) hmax, lmin = hhv(s, nfastk), llv(s, nfastk) fastk = 100 * (s.close - lmin) / (hmax - lmin) fullk = moments.rolling_mean(fastk, nfullk) fulld = moments.rolling_mean(fullk, nfulld) return DataFrame(dict(fastk=fastk, fullk=fullk, fulld=fulld))
def stoch(s, nfastk=14, nfullk=3, nfulld=3): if not isinstance(s, DataFrame): s = DataFrame(dict(high=s, low=s, close=s)) hmax, lmin = hhv(s, nfastk), llv(s, nfastk) fastk = 100 * (s.close - lmin)/(hmax - lmin) fullk = moments.rolling_mean(fastk, nfullk) fulld = moments.rolling_mean(fullk, nfulld) return DataFrame(dict(fastk=fastk, fullk=fullk, fulld=fulld))
def read_quote(quote): fe = read_close(quote) ratios, financials, fcf, eps, shares_o = load_csv_files(quote) ep, fep = parse_eps(eps, max(fe.index)) fc = fcf.fcf fc.index = fcf.pDate fc = fc.apply(lambda x: convert(x)) #fc_pad = pad(fc, fc[-1], max(fe.index)) shares = shares_o.shares_o shares.index = shares_o.pDate shares = shares.apply(lambda x: convert(x)) shares_pad = pad(shares, shares[-1], max(fe.index)) #past_year_fcf = rolling_sum(fc, 4, min_periods=4) fcf_shares = (fc / shares_pad).dropna() fcf_growth_rate = calculate_fc_growth(fcf_shares) past_year_eps = rolling_sum(ep, 4, min_periods=4) calculate_eps_growth(past_year_eps) #py_fc_pad = pad(past_year_fc, past_year_fc[-1], max(fe.index)) fcf_growth_rate = 0.06 growth=fcf_growth_rate * 0.75 * 100 mg_df = past_mg_value(past_year_eps, growth=growth) #past_2year_eps = rolling_sum(ep, 8, min_periods=8) #past_3year_eps = rolling_sum(ep, 12, min_periods=12) #past_4year_eps = rolling_sum(ep, 16, min_periods=16) #past_5year_eps = rolling_sum(ep, 20, min_periods=20) #past_year_eps_ewma = ewma(ep, span=3, min_periods=4) #past_5year_eps_ewma = ewma(ep, span=19, min_periods=20) #ep.tshift(1, freq='D') #Need to adjust because earnings happens EOD. Actually you don't dates aren't exact df = DataFrame({'close' : fe, 'fep': fep}) #df['last_qtr_eps'] = fep add_series(df, 'Valuemg Sell', mg_df['Valuemg'] * 1.1, max(fe.index)) add_series(df, 'Valuemg Buy', mg_df['Valuemg'] * 0.75, max(fe.index)) sub = df[df['Valuemg Sell'] > -1000].copy() sub['mavg_50day'] = rolling_mean(sub.close, 50, min_periods=1).shift(1) sub['mavg_200day'] = rolling_mean(sub.close, 200, min_periods=1).shift(1) sub.plot() #sub['ewma_s50'] = ewma(sub.close, span=50) #sub['ewma_s20'] = ewma(sub.close, span=20) plot_2015(sub, quote) return sub
def chaikinad(m, n, advalues, type='simple'): import numpy as np from pandas.stats.moments import rolling_mean # ma = moving_average(advalues, # (m if m < len(advalues) else len(advalues)), # type=type) # na = moving_average(advalues, # (n if n < len(advalues) else len(advalues)), # type=type) ma = np.nan_to_num(rolling_mean(np.asarray(advalues), m)) na = np.nan_to_num(rolling_mean(np.asarray(advalues), n)) return ma - na
def plot_financials(self): df = DataFrame({'close' : self.close}) growth = self.calculate_eps_growthmg(self.yearly_eps()) print("Plotting financials using growth: %.2f" % growth) mg_df = self.past_mg_value(growth=growth) self.add_series(df, 'Valuemg Sell', mg_df['Valuemg'] * 1.1) self.add_series(df, 'Valuemg Buy', mg_df['Valuemg'] * 0.75) sub = df[df['Valuemg Sell'] > -1000].copy() sub['mavg_50day'] = rolling_mean(sub.close, 50, min_periods=1).shift(1) sub['mavg_200day'] = rolling_mean(sub.close, 200, min_periods=1).shift(1) sub.plot()
def plot_barometric_pressure(): fig, (baro, pt, diff) = plt.subplots(3,1,sharex=True) ## Barometric Data allbaro['Baropress'].plot(ax=baro,c='k', label='Barometric Pressure (kPa)') allbaro['NDBCbaro'].plot(ax=baro,c='r', label='NDBC NSTP6') allbaro['FPbaro'].plot(ax=baro,c='g', label='Weather Station') ## PT Data PT1['Pressure'].plot(ax=baro,c='b', label='LBJ PT Pressure') PT1['stage(cm)'].plot(ax=pt,c='b', label='LBJ PT Stage(cm)') ## Difference between PT pressure and Barometric pressure at low stages press_diff_baseflow = PT1['Pressure'][PT1['stage(cm)']<10]-PT1['barodata'] m.rolling_mean(press_diff_baseflow,window=96).plot(ax=diff, label='Daily Mean difference kPa (PT-Baro)') ## 96 * 15min = 24 hours baro.legend(), pt.legend(), diff.legend() return
def test_cmov_mean(self): try: from scikits.timeseries.lib import cmov_mean except ImportError: raise nose.SkipTest vals = np.random.randn(10) xp = cmov_mean(vals, 5) rs = mom.rolling_mean(vals, 5, center=True) assert_almost_equal(xp.compressed(), rs[2:-2]) assert_almost_equal(xp.mask, np.isnan(rs)) xp = Series(rs) rs = mom.rolling_mean(Series(vals), 5, center=True) assert_series_equal(xp, rs)
def findEvents(symbols, startday,endday,verbose=False): timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Norgate') if verbose: print __name__ + " reading data" adjclose = dataobj.get_data(timestamps, symbols, closefield) adjclose = (adjclose.fillna()).fillna(method='backfill') if verbose: print __name__ + " finding events" # for symbol in symbols: # close[symbol][close[symbol]>= 1.0] = np.NAN # for i in range(1,len(close[symbol])): # if np.isnan(close[symbol][i-1]) and close[symbol][i] < 1.0 :#(i-1)th was > $1, and (i)th is <$1 # close[symbol][i] = 1.0 #overwriting the price by the bit # close[symbol][close[symbol]< 1.0] = np.NAN #print adjclose # Get the 20 day moving avg and moving stddev movavg = pa.rolling_mean(adjclose,20,min_periods=20) movstddev = pa.rolling_std(adjclose, 20, min_periods=20) # Compute the upper and lower bollinger bands upperband = movavg + 2*movstddev lowerband = movavg - 2*movstddev #bandwidth = (upperband - lowerband)/movavg #print bandwidth #print upperband # Compute the event matrix as follows: # Set periods of low volatility to 1 # In from the period of low volatility to the period of say, 15 days, following low volatility # if the stock price breaks above the upper band, there is a surge. this is a positive event. Set this event to 2 # Finally, set all events other than 2 to NaN. Then, set all 2's to 1 lookaheadperiod = 15 eventMatrix = adjclose.copy() for symbol in symbols: for row in range(len(adjclose[:][symbol])): eventMatrix[symbol][row] = np.NAN if upperband[symbol][row] > 0 and lowerband[symbol][row] > 0 and movavg[symbol][row] > 0: if (upperband[symbol][row] - lowerband[symbol][row])/movavg[symbol][row] < 0.10: eventMatrix[symbol][row] = 1 else: currow = row - 1 numOnes = 0 while currow > row - lookaheadperiod and currow >= 0: if eventMatrix[symbol][currow] != 1: break if eventMatrix[symbol][currow] == 1 and adjclose[symbol][row] > upperband[symbol][row]: numOnes = numOnes + 1 currow = currow - 1 if numOnes >= 5: eventMatrix[symbol][row] = 2 eventMatrix[symbol][eventMatrix[symbol]!= 2] = np.NAN eventMatrix[symbol][eventMatrix[symbol]== 2] = 1 return eventMatrix
def reply_moving_average(start_date, end_date): """ Chart (average reply time with moving average over the last 5 days) When you don't know it google what moving average is """ insight_table = join_tables() df = insight_table[(insight_table.updated_message >= start_date) & (insight_table.updated_message <= end_date)] reply_times = {"date": [], "avg": []} for ts in pd.date_range(start_date, end_date, freq="D"): reply_times["date"].append(ts.date()) expert_answers = df[(df.updated_message <= ts.date()) & (df.type == "expert")] if expert_answers.empty: reply_times["avg"].append(None) else: avgs = expert_answers.groupby("name").apply(lambda g: (g.updated_message.min() - g.updated_ticket.iloc[0])) avgs = [a.days for a in avgs] reply_times["avg"].append(sum(avgs) / len(avgs)) result = pd.DataFrame.from_dict(reply_times) result = result.fillna(method="ffill") result = result.dropna() result.avg = rolling_mean(result.avg, 5) return result.dropna()
def calc_returns(self): """Calculates returns """ self.df["ATR"] = average_true_range(self.df) mean = rolling_mean(self.df["Adj Close"], self.mean_days) self.df["plus"] = mean + self.a * self.df["ATR"] self.df["minus"] = mean - self.a * self.df["ATR"] self.df["position"] = 0.0 # figure out when the close is higher than the plus # delay by one period low_triggered = snf(self.df["Adj Close"] > self.df["plus"]) num_low_entries = low_triggered.sum() # set the position (the period following) self.df.loc[low_triggered, "position"] = -1.0 # figure out when the close is less than plus high_triggered = snf(self.df["Adj Close"] < self.df["minus"]) num_high_entries = high_triggered.sum() self.df.loc[high_triggered, "position"] = 1.0 entries = num_high_entries + num_low_entries self.df["period_returns"] = self.df["Adj Close"] - self.df[ "Adj Close"].shift(1) # now here we can calculate the exit based on different strategies # may want to pass this function in, in a functional way return test_fixed_stop_target(self.df, self.tar_p, self.tar_n)
def forecast_mean_sd(self, returns): mean_fcst = pd.Panel( {"Forecast": rolling_mean(returns.data, self.window)}) sd_fcst = pd.Panel( {"Forecast": rolling_std(returns.data, self.window)}) return (mean_fcst, sd_fcst)
def calc_returns(self): """Calculates returns """ self.df["ATR"] = average_true_range(self.df) mean = rolling_mean(self.df["Adj Close"], self.mean_days) self.df["plus"] = mean + self.a * self.df["ATR"] self.df["minus"] = mean - self.a * self.df["ATR"] self.df["position"] = 0.0 # figure out when the close is higher than the plus # delay by one period low_triggered = snf(self.df["Adj Close"] > self.df["plus"]) num_low_entries = low_triggered.sum() # set the position (the period following) self.df.loc[low_triggered, "position"] = -1.0 # figure out when the close is less than plus high_triggered = snf(self.df["Adj Close"] < self.df["minus"]) num_high_entries = high_triggered.sum() self.df.loc[high_triggered, "position"] = 1.0 entries = num_high_entries + num_low_entries self.df["period_returns"] = self.df["Adj Close"] - self.df["Adj Close"].shift(1) # now here we can calculate the exit based on different strategies # may want to pass this function in, in a functional way return test_fixed_stop_target(self.df, self.tar_p, self.tar_n)
def moving_average(self, data, column='reads_per_bp_norm'): ''' Calculate moving average of slope over defined window. ''' data['avg_' + column] = rolling_mean(data[column], self.moving_window) return data
def bbands(s, n=20, ndev=2): mavg = moments.rolling_mean(s, n) mstd = moments.rolling_std(s, n) hband = mavg + ndev*mstd lband = mavg - ndev*mstd return DataFrame(dict(ma=mavg, lband=lband, hband=hband))
def cci(s, n=20, c=0.015): if isinstance(s, DataFrame): s = s[['high', 'low', 'close']].mean(axis=1) mavg = moments.rolling_mean(s, n) mdev = moments.rolling_apply(s, n, lambda x: np.fabs(x - x.mean()).mean()) return (s - mavg)/(c * mdev)
def cci(s, n=20, c=0.015): if isinstance(s, DataFrame): s = s[['high', 'low', 'close']].mean(axis=1) mavg = moments.rolling_mean(s, n) mdev = moments.rolling_apply(s, n, lambda x: np.fabs(x - x.mean()).mean()) return (s - mavg) / (c * mdev)
def bbands(s, n=20, ndev=2): mavg = moments.rolling_mean(s, n) mstd = moments.rolling_std(s, n) hband = mavg + ndev * mstd lband = mavg - ndev * mstd return DataFrame(dict(ma=mavg, lband=lband, hband=hband))
def find_peak_ind(data, width, width_roll_mean=200, roll_max_peaks_threshold=4.0, is_ret_roll_max_peaks=False): """ Calculate the indices of isolated maxima in the data array usually containing the result of a correlation calculation bewteen a timeseries and a pattern. Parameters ---------- data : 1d ndarray Timeseries,usually containing the result of a correlation calculation between a timeseries and a pattern. width : int The width of an interval in which the maximum is found. I.e. two maxima have to be at least width apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns ------- peak_inds : list List of indices of the peaks in data. roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ roll_mean = mom.rolling_mean(data, width_roll_mean, center=True) # plt.figure() # plt.plot(data) # plt.show() roll_mean = 1 roll_max_peaks = mom.rolling_max(data / roll_mean, width, center=False) # -- Calculate the centered rolling max. roll_max_peaks_c = mom.rolling_max(data / roll_mean, width, center=True) roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold)) peak_inds = [] for c in roll_peak_inds[1:-1]: # -- max is when left entry in roll_max_peaks is smaller and right is equal and # if in centered roll_max_peaks_c the left (and the right) are the same if (roll_max_peaks[c - 1] < roll_max_peaks[c] and np.abs(roll_max_peaks[c] - roll_max_peaks[c + 1]) < 0.0001 and np.abs(roll_max_peaks[c] - roll_max_peaks_c[c - 1]) < 0.0001): peak_inds.append(c) if is_ret_roll_max_peaks: return peak_inds, roll_max_peaks else: return peak_inds
def lineplot(name, with_ax=False, avg=10, xlabel=None, ylabel=None, title=None, **style): with axman(name, xlabel=xlabel, ylabel=ylabel, title=title) as ax: data = DATA[name] if with_ax: yield (data, ax) else: yield data ax.plot(range(len(data)), data, alpha=0.5, **style) if avg: ax.plot(rolling_mean(np.asarray(data), avg), alpha=0.5, c='k', lw=2)
def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) functions = [ lambda x: mom.rolling_cov( x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_corr( x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_max(x, window=10, min_periods=5), lambda x: mom.rolling_min(x, window=10, min_periods=5), lambda x: mom.rolling_sum(x, window=10, min_periods=5), lambda x: mom.rolling_mean(x, window=10, min_periods=5), lambda x: mom.rolling_std(x, window=10, min_periods=5), lambda x: mom.rolling_var(x, window=10, min_periods=5), lambda x: mom.rolling_skew(x, window=10, min_periods=5), lambda x: mom.rolling_kurt(x, window=10, min_periods=5), lambda x: mom.rolling_quantile( x, quantile=0.5, window=10, min_periods=5), lambda x: mom.rolling_median(x, window=10, min_periods=5), lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), lambda x: mom.rolling_window( x, win_type='boxcar', window=10, min_periods=5), ] for f in functions: try: s_result = f(s) assert_series_equal(s_result, s_expected) df_result = f(df) assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window continue functions = [ lambda x: mom.rolling_cov( x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr( x, x, pairwise=True, window=10, min_periods=5), # rolling_corr_pairwise is depracated, so the following line should be deleted # when rolling_corr_pairwise is removed. lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5 ), ] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel)
def test_legacy_time_rule_arg(self): # suppress deprecation warnings sys.stderr = StringIO() rng = bdate_range('1/1/2000', periods=20) ts = Series(np.random.randn(20), index=rng) ts = ts.take(np.random.permutation(len(ts))[:12]).sort_index() try: result = mom.rolling_mean(ts, 1, min_periods=1, freq='B') expected = mom.rolling_mean(ts, 1, min_periods=1, time_rule='WEEKDAY') tm.assert_series_equal(result, expected) result = mom.ewma(ts, span=5, freq='B') expected = mom.ewma(ts, span=5, time_rule='WEEKDAY') tm.assert_series_equal(result, expected) finally: sys.stderr = sys.__stderr__
def find_peak_ind(data,width,width_roll_mean = 200,roll_max_peaks_threshold = 4.0, is_ret_roll_max_peaks = False): """ Calculate the indices of isolated maxima in the data array usually containing the result of a correlation calculation bewteen a timeseries and a pattern. Parameters ---------- data : 1d ndarray Timeseries,usually containing the result of a correlation calculation between a timeseries and a pattern. width : int The width of an interval in which the maximum is found. I.e. two maxima have to be at least width apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns ------- peak_inds : list List of indices of the peaks in data. roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ roll_mean = mom.rolling_mean(data, width_roll_mean,center=True) # plt.figure() # plt.plot(data) # plt.show() roll_mean = 1 roll_max_peaks = mom.rolling_max(data/roll_mean,width,center=False) # -- Calculate the centered rolling max. roll_max_peaks_c = mom.rolling_max(data/roll_mean,width,center=True) roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold)) peak_inds = [] for c in roll_peak_inds[1:-1]: # -- max is when left entry in roll_max_peaks is smaller and right is equal and # if in centered roll_max_peaks_c the left (and the right) are the same if (roll_max_peaks[c-1] < roll_max_peaks[c] and np.abs(roll_max_peaks[c]-roll_max_peaks[c+1]) < 0.0001 and np.abs(roll_max_peaks[c]-roll_max_peaks_c[c-1]) < 0.0001): peak_inds.append(c) if is_ret_roll_max_peaks: return peak_inds,roll_max_peaks else: return peak_inds
def test_cmov_window_na_min_periods(self): try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest # min_periods vals = Series(np.random.randn(10)) vals[4] = np.nan vals[8] = np.nan xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) assert_series_equal(xp, rs)
def compute_bollinger_band(basic_portfolio, period, source='yahoo', filename=None): """ Compute the bollinger band for a list of stocks. @param basic_portfolio: A basic portfolio instance @param period: @param source: source to get the data @param filename: @return: """ assert isinstance(basic_portfolio, BasicPortfolio) stock_close_prices = basic_portfolio.get_stock_close_prices(source) basic_portfolio.print_information() print 'Lookback period : ', period bol_mean = ts.rolling_mean(stock_close_prices, period) bol_std = ts.rolling_std(stock_close_prices, period) bollinger_band_up = bol_mean + bol_std bollinger_band_down = bol_mean - bol_std plt.clf() plt.plot(stock_close_prices.index, stock_close_prices.values) plt.plot(stock_close_prices.index, bollinger_band_up) plt.plot(stock_close_prices.index, bollinger_band_down) plt.legend(['Stock adjusted price', 'Bollinger band', 'Bollinger band']) plt.ylabel('Price') plt.xlabel('Date') if filename is not None: plt.savefig(filename, format='pdf') else: plt.show() bol_val = (stock_close_prices - bol_mean) / bol_std val = DataFrame(bol_val, index=stock_close_prices.index, columns=basic_portfolio.tickers) # print val[-5:] val.to_csv('result/bol.csv') # return the bollinger value return val
def bollinger_bands(d_data, ldt_timestamps, ls_symbols=None, lookback = 20, width = 1, plot_boll=False, ls_symbols_plot=None): if ls_symbols == None: ls_symbols = list(d_data.keys()) df_close = copy.deepcopy(d_data) df_close = df_close[ls_symbols] df_mean_bollinger = copy.deepcopy(df_close) * np.NAN df_std_bollinger = copy.deepcopy(df_close) *np.NAN df_index_bollinger = copy.deepcopy(df_close) *np.NAN for c_sym in ls_symbols: df_mean_bollinger[c_sym] = pd_stats.rolling_mean(df_close[c_sym],lookback) df_std_bollinger[c_sym] = width*pd_stats.rolling_std(df_close[c_sym],lookback) df_index_bollinger[c_sym] = (df_close[c_sym] - df_mean_bollinger[c_sym])/df_std_bollinger[c_sym] if plot_boll: if ls_symbols_plot == None: if len(ls_symbols) <= 5: ls_symbols_plot = ls_symbols else: ls_symbols_plot = ls_symbols[0:5] fig = [] for c_sym in ls_symbols_plot: fig.append(plt.figure()) ax = fig[-1].add_subplot(211) ax.plot(ldt_timestamps,df_close[c_sym],'k') ax.plot(ldt_timestamps,df_mean_bollinger[c_sym],'b') ax.plot(ldt_timestamps,df_mean_bollinger[c_sym] - df_std_bollinger[c_sym],'b--') ax.plot(ldt_timestamps,df_mean_bollinger[c_sym] + df_std_bollinger[c_sym],'b--') ax.set_xlim((ldt_timestamps[0],ldt_timestamps[-1])) ax.get_xaxis().set_visible(False) ax.set_ylabel('Adj. Close') ax = fig[-1].add_subplot(212) ax.plot(ldt_timestamps, df_index_bollinger) ax.plot([ldt_timestamps[0], ldt_timestamps[-1]], [1, 1]) ax.plot([ldt_timestamps[0], ldt_timestamps[-1]], [-1, -1]) ax.set_xlim((ldt_timestamps[0],ldt_timestamps[-1])) ax.set_xlabel('Time') ax.set_ylabel('Bollinger Val.') plt.show() return df_index_bollinger
def test_centered_axis_validation(self): # ok mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) # bad axis self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1) # ok ok mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0) mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1) # bad axis self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2)
def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_max(x, window=10, min_periods=5), lambda x: mom.rolling_min(x, window=10, min_periods=5), lambda x: mom.rolling_sum(x, window=10, min_periods=5), lambda x: mom.rolling_mean(x, window=10, min_periods=5), lambda x: mom.rolling_std(x, window=10, min_periods=5), lambda x: mom.rolling_var(x, window=10, min_periods=5), lambda x: mom.rolling_skew(x, window=10, min_periods=5), lambda x: mom.rolling_kurt(x, window=10, min_periods=5), lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), lambda x: mom.rolling_median(x, window=10, min_periods=5), lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), ] for f in functions: try: s_result = f(s) assert_series_equal(s_result, s_expected) df_result = f(df) assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window continue functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), # rolling_corr_pairwise is depracated, so the following line should be deleted # when rolling_corr_pairwise is removed. lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), ] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel)
def compute_bollinger_band(basic_portfolio, period, source='yahoo', filename=None): """ Compute the bollinger band for a list of stocks. @param basic_portfolio: A basic portfolio instance @param period: @param source: source to get the data @param filename: @return: """ assert isinstance(basic_portfolio, BasicPortfolio) stock_close_prices = basic_portfolio.get_stock_close_prices(source) basic_portfolio.print_information() print 'Lookback period : ', period bol_mean = ts.rolling_mean(stock_close_prices, period) bol_std = ts.rolling_std(stock_close_prices, period) bollinger_band_up = bol_mean + bol_std bollinger_band_down = bol_mean - bol_std plt.clf() plt.plot(stock_close_prices.index, stock_close_prices.values) plt.plot(stock_close_prices.index, bollinger_band_up) plt.plot(stock_close_prices.index, bollinger_band_down) plt.legend(['Stock adjusted price', 'Bollinger band', 'Bollinger band']) plt.ylabel('Price') plt.xlabel('Date') if filename is not None: plt.savefig(filename, format='pdf') else: plt.show() bol_val = (stock_close_prices - bol_mean)/bol_std val = DataFrame(bol_val, index=stock_close_prices.index, columns=basic_portfolio.tickers) # print val[-5:] val.to_csv('result/bol.csv') # return the bollinger value return val
def aggregated_line_seeds(results, title): plt.close() sorted_points = np.array(sorted(results, key=itemgetter(1))) sorted_time = sorted_points[:, 1] / 60 / 60 sorted_errors = sorted_points[:, 2] if is_regression: sorted_errors = np.log10(sorted_errors) y_mean = stats.rolling_mean(sorted_errors, 5) # y_std = stats.rolling_std(sorted_errors, 5) y_upper = stats.rolling_max(sorted_errors, 5) y_lower = stats.rolling_min(sorted_errors, 5) plt.plot(sorted_time, y_mean, color="red", label="Rolling mean") # plt.legend() plt.fill_between(sorted_time, y_mean, y_upper, facecolor='gray', interpolate=True, alpha=0.5) plt.fill_between(sorted_time, y_lower, y_mean, facecolor='gray', interpolate=True, alpha=0.5) plt.xlabel("Time (h)") if is_regression: plt.ylabel("log(RMSE)") else: plt.ylabel("% class. error") plt.ylim(0, 100) plt.margins(0.05, 0.05) plt.title(title) plt.savefig("%s/plots%s/trajectories-%s.aggregated.png" % (os.environ['AUTOWEKA_PATH'], suffix, title), bbox_inches='tight')
def forecast(self, forecast_start_str, forecast_period_in_days, periods_of_data_to_use): '''Perform the forecast and return forecast as pandas Series object''' #create forecast index forecast_index = date_range(forecast_start_str, periods=forecast_period_in_days) #Extract only that data which is necessary to make the first moving average calculation data_series = self.training_ts.tail(periods_of_data_to_use) forecast = Series() for time in forecast_index: #forecasted value is last value in rolling_mean list - all others are NaN because of forecast window length if self.forecast_method == 'ma': #Forecast using the simple moving average forecast_value = rolling_mean(data_series, periods_of_data_to_use).loc[-1] elif self.forecast_method == 'ewma': #forecast using the exponentially weighted moving average forecast_value = ewma(data_series, span=periods_of_data_to_use).loc[-1] #print forecast_value #remove 1-st value from data because its not needed for next forecasted value data_series = data_series[1:] #Append forecasted value to data because forecast is data for next iteration MA data_series = concat([data_series, Series(forecast_value, index=[time])]) forecast = concat([forecast, Series(forecast_value, index=[time])]) return forecast
def ComputeBollingerBands(ls_symbols, startdate, enddate, period, filename = ''): # Get the data from local repository d_data = GetDataLocalYahoo(startdate, enddate, ls_symbols) print 'Symbol : ', ls_symbols print 'Start date : ', startdate print 'Start date : ', enddate print 'Lookback period : ', period df_close = d_data['close'] bol_mean = ts.rolling_mean(df_close, period) bol_std = ts.rolling_std(df_close, period) bolband_up = bol_mean + bol_std bolband_dw = bol_mean - bol_std # Plotting the prices with x-axis=timestamps if filename is not '': plt.clf() plt.plot(df_close.index, df_close.values) plt.plot(df_close.index, bolband_up) plt.plot(df_close.index, bolband_dw) plt.legend(['Stock adjusted price', 'Bollinger band', 'Bollinger band']) plt.ylabel('Price') plt.xlabel('Date') plt.savefig(filename, format='pdf') bol_val = (df_close - bol_mean)/bol_std val = DataFrame(bol_val, index = df_close.index, columns = ls_symbols) # print val[-5:] val.to_csv('bol.csv') # return the bollinger value return val
def _re_bin_changed(self): ## Fix, squashing indicies #self.dataframe=rebin(self.dataframe, self.re_bin, axis=0, avg_fcn='mean') self.dataframe = rolling_mean(self.originaldata, self.re_bin)
def strat_maLong_maShort(df=readYahoo('SPY'), maLongDays=10, maShortDays=3, closeCol='Close', highCol='High', lowCol='Low', openCol='Open', signOfTrade=1, printit=True, block=False): ''' execute strategy which enters and exit based on Moving Average crossovers Example: from pystrats.state_strats import strat_maLong_maShort as ss dfretfinal = ss() #strat_maLong_maShort() print dfretfinal print dfretfinal['ret'].mean() ''' close = np.array(df[closeCol]) high = np.array(df[highCol]) low = np.array(df[lowCol]) open = np.array(df[openCol]) date = np.array(df['Date']) ma10 = rolling_mean(close, maLongDays) ma9 = rolling_mean(close, maLongDays - 1) ma3 = rolling_mean(close, maShortDays) ma2 = rolling_mean(close, maShortDays - 1) n = len(df) nl = n - 1 # pMa10 = dsInsert(ma10[0:nl],0,None) # pMa9 = dsInsert(ma9[0:nl],0,None) # pMa3 = dsInsert(ma3[0:nl],0,None) # pMa2 = dsInsert(ma2[0:nl],0,None) pMa10 = np.insert(ma10[0:nl], 0, None) pMa9 = np.insert(ma9[0:nl], 0, None) pMa3 = np.insert(ma3[0:nl], 0, None) pMa2 = np.insert(ma2[0:nl], 0, None) pClose = np.insert(close[0:nl], 0, None) pHigh = np.insert(high[0:nl], 0, None) pLow = np.insert(low[0:nl], 0, None) # initialize state vector state = np.array([1] * n) #loop start_i = maLongDays + 1 for i in range(start_i, n): if (pClose[i] < pMa10[i]) & (state[i - 1] == 1) & (high[i] > pMa9[i]): state[i] = 2 elif (state[i - 1] == 2) & (low[i] > pMa2[i]): state[i] = 2 elif (state[i - 1] == 2) & (low[i] <= pMa2[i]): state[i] = 1 pState = np.insert(state[0:nl], 0, 1) # create entry conditions # 1. initial entry (state 1 to state 2) e1_2 = np.array((pState == 1) & (state == 2)) e2_2 = np.array((pState == 2) & (state == 2)) e2_1 = np.array((pState == 2) & (state == 1)) dfret = DataFrame([date, pHigh, pLow, pClose, pMa10, pMa9, pMa3, pMa2]).T dfret.columns = [ 'Date', 'pHigh', 'pLow', 'pClose', 'pMa10', 'pMa9', 'pMa3', 'pMa2' ] #create daily entry prices dailyEntryPrices = np.array([0] * n) # default entry dailyEntryPrices = asb(dailyEntryPrices, pMa9, e1_2) useCloseOnEntry = e1_2 & (low > pMa9) dailyEntryPrices = asb(dailyEntryPrices, close, useCloseOnEntry) dailyEntryPrices = asb(dailyEntryPrices, pClose, e2_2) dailyEntryPrices = asb(dailyEntryPrices, pClose, e2_1) dfret['entry'] = dailyEntryPrices # create DAILY settle prices, which are either 0 or the Close # dfret$Close <- close dailySettlePrices = np.array([0] * n) dailySettlePrices = asb(dailySettlePrices, close, e1_2) #<- close[w1_2] dailySettlePrices = asb(dailySettlePrices, close, e2_2) #dailySettlePrices[w2_2] <- close[w2_2] dailySettlePrices = asb(dailySettlePrices, pMa2, e2_1) #dailySettlePrices[w2_1] <- pMa2[w2_1] # adjust for situations where the high is below the pMa2, so you get out at the close useCloseOnExit = e2_1 & (high < pMa2) dailySettlePrices = asb( dailySettlePrices, close, useCloseOnExit ) #dailySettlePrices[useCloseOnExit] <- close[useCloseOnExit] dfret['exit'] = dailySettlePrices dfret['ret'] = dfret['exit'] / dfret['entry'] - 1 dfret['ret'].fillna(0) dfretfinal = dfret.dropna(0) #dfretfinal <- dfret[-badrows(dfret),] if printit: retDf = DataFrame({ 'Date': dfretfinal['Date'], 'ret': dfretfinal['ret'] }) returnsPerformance(retDf, block=block) return dfretfinal
def average_true_range(df, N=14): """calculates the ATR by taking a rolling mean over the true range.""" true_range = calc_true_range(df) return rolling_mean(true_range, N)
data.loc[i, 'label'] = data.loc[i + 1, 'close'] else: data.loc[i, 'label'] = data.loc[i, 'close'] # transform pandas to numpy #data=data.iloc[:,1:10].values #取第2-10列 trX = data.iloc[:, 1:9] trY = data.iloc[:, 9] feat_labels = data.columns[1:-1] # Calculate moving average from pandas.stats.moments import rolling_mean date_range = data["date"].values plt.plot(date_range, data["label"].values, label="close original") plt.plot(date_range, rolling_mean(data, 5)["label"].values, label="close 5") plt.plot(date_range, rolling_mean(data, 10)["label"].values, label="close 10") plt.legend() plt.show() plt.gcf().clear() #from sklearn.model_selection import train_test_split #trX, teX, trY, teY = train_test_split( # X, y, test_size=0.2, random_state=0) # Assessing Feature Importances with Random Forests from sklearn.ensemble import RandomForestRegressor forest = RandomForestRegressor(n_estimators=10000, random_state=0, n_jobs=-1) forest.fit(trX, trY) importances = forest.feature_importances_
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) print dt.datetime.now().time(), "Read the data" data_obj = da.DataAccess('Yahoo') ls_symbols = data_obj.get_symbols_from_list('sp5002012') cmp_symbols = ['SPY'] ls_keys = 'close' ldf_data = data_obj.get_data(ldt_timestamps, ls_symbols + cmp_symbols, ls_keys) ldf_data = ldf_data.fillna(method='ffill') ldf_data = ldf_data.fillna(method='bfill') ldf_data = ldf_data.fillna(1.0) print dt.datetime.now().time(), "Calculating Bollinger's Value" data_mean = pd.rolling_mean(ldf_data, window=20, min_periods=1) data_std = pd.rolling_std(ldf_data, window=20, min_periods=1) bollinger_value = {} for s in ls_symbols + cmp_symbols: bollinger_value[s] = (ldf_data[s][ldt_timestamps] - data_mean[s][ldt_timestamps]) / data_std[s][ldt_timestamps] print dt.datetime.now().time(), "Finding Events" total_count = 0 print ldt_timestamps # f = open('workfile.csv', 'w') # for s_sym in ls_symbols: # print "\t", s_sym # temp_count = 0 # for i in range(1, len(ldt_timestamps)): # range(19, len(ldt_timestamps) - 20):
import matplotlib.pyplot as plt import statsmodels.api as sm from pandas.stats.moments import rolling_mean data_loader = sm.datasets.sunspots.load_pandas() df = data_loader.data year_range = df["YEAR"].values plt.plot(year_range, df["SUNACTIVITY"].values, label="Original") plt.plot(year_range, rolling_mean(df, 11)["SUNACTIVITY"].values, label="SMA 11") plt.plot(year_range, rolling_mean(df, 22)["SUNACTIVITY"].values, label="SMA 22") plt.legend() plt.show()
def e_greedy(actions, e=0.01): """ greedy greedy - always plus some noise """ for action, rewards in actions.iteritems(): if value(rewards) > rv: rv = int(action + random.gauss(0, e)) return rv actions = dict.fromkeys(range(10),[]) import numpy as np from pandas import DataFrame from pandas.stats.moments import rolling_mean df = DataFrame(index=range(100), columns=['0.0', '0.01', '0.1']) for step in range(100): num_times_greedy=dict.fromkeys(range(10),0.0) arms = np.round(np.random.normal(0,1,2000)*10) # 10 random arms value[action].append(value(action)) selected = greedy(actions, num_times_greedy[a]) num_times_greedy[selected] +=1 num_times_greedy[greedy(actions, num_times_greedy[greedy(actions)])] +=1 df.loc[step,'0.0'] = num_times_greedy[greedy(actions)] df.loc[step,'0.01'] = e_greedy(actions, 0.01) df.loc[step,'0.1'] = e_greedy(actions, 0.1) #print "Step: {}".format(step) mv_ag_df = rolling_mean(df,1) mv_ag_df.plot()
thisind = np.clip(int(x+0.5), 0, N-1) return ts_date[thisind].strftime('%Y-%m-%d') #################################################### # Bollinger Bands with Pandas # #################################################### # Select close price as plotting data close_px = aapl['Close'] # Parameters for Bollinger Bands period = 10 std = 2 # Calculation of Bollinger Bands: SMA, Upper and Lower mavg = pa.rolling_mean(close_px, period) mstd = pa.rolling_std(close_px, period) uband = mavg + 2 * mstd lband = mavg - 2 * mstd # Excercise: Use Matplotlib to plot stock price #close_px.plot(label='AAPL', style='k*') #mavg.plot(label='mavg') #uband.plot() #lband.plot() #plt.legend() #plt.show() ################################################## # Data for Candlestick Chart # # Open, Close, High, Low #
title = folder.split("/")[-1] print title ax1.set_title(title) # ax.set_xlabel('Time (h)') # ax.set_ylabel('RMSE') # ax.set_yscale('log') # ax.set_xlim(0,30) # colors = sns.color_palette("husl", 25) # for i in range(0,25): # ax.scatter(time_by_seed[i], error_by_seed[i], c=cm.hsv(i/25.,1), s=[30]*len(time_by_seed[i])) # ax.scatter(time_by_seed[i], error_by_seed[i], c=[colors[i]]*len(time_by_seed[i]), s=[30]*len(time_by_seed[i])) ax1.set_xlabel('Time (h)') ax1.set_ylabel('RMSE') ax1.set_xlim(-1, 30) y_mean = stats.rolling_mean(sorted_errors, 5) y_std = stats.rolling_std(sorted_errors, 5) # y_upper = y_mean + 2*y_std y_upper = stats.rolling_max(sorted_errors, 5) # y_lower = y_mean - 2*y_std y_lower = stats.rolling_min(sorted_errors, 5) sorted_data = DataFrame(data=sorted_points, columns=['time', 'binned_time', 'error', 'seed']) # sns.jointplot("binned_time", "error", sorted_data) # ax1.scatter(sorted_binned_time, sorted_errors) ax1.plot(sorted_time, y_mean, color="red", label="Rolling mean") # ax1.errorbar(sorted_binned_time, sorted_errors, marker='o', ms=8, yerr=3*y_std, ls='dotted', label="Rolling mean") ax1.legend() ax1.fill_between(sorted_time, y_mean, y_upper,
def plot_do(files, strait='DS', maxyear=None, savefig=False, figname=None): files = sorted(files) # make sure all files exist for fname in files: if not os.path.isfile(fname): raise IOError('File not found: ' + fname) # keyword arguments for reading the files kwargs = dict(key='df') if maxyear: kwargs['where'] = 'ModelYear<={}'.format(maxyear) fig, axx = plt.subplots(nrows=len(fieldsets), ncols=len(fieldsets[0]), sharex='all', sharey='row', figsize=(16, 9)) spt = fig.suptitle('Strait: {}'.format(strait)) cases = [os.path.basename(fname).split('.do.h5')[0] for fname in files] # loop through files for nf, fname in enumerate(files): label = cases[nf] df = pd.read_hdf(fname, **kwargs) df = df.loc[strait] for i, fields in enumerate(fieldsets): for j, varn in enumerate(fields): ax = axx[i, j] ax.set_title(varn) if varn.startswith('rho_'): # compute density from T,S salt = rolling_mean(df[varn.replace('rho_', 'S')], 365).values temp = rolling_mean(df[varn.replace('rho_', 'T')], 365).values series = gsw.rho(salt, temp, 0) - 1e3 else: # get series directly from file series = rolling_mean(df[varn], 365).values x = df.index.get_level_values('ModelYear') ax.plot(x, series, label=label) # only once if nf == 0: # plot observations try: values = observations[strait][varn] try: obs_handle = ax.axhspan(values[0], values[1], **obs_props) except TypeError: ax.axhline(values, color='0.6666', linewidth=2) except KeyError: pass # legend with patch for observations handles, labels = axx.flat[0].get_legend_handles_labels() obs_handle = mpatches.Patch(**obs_props) handles += [obs_handle] labels += ['observations'] fig.subplots_adjust(right=0.8) lgd = fig.legend(handles, labels, bbox_to_anchor=(0.82, 0.5), loc='center left', bbox_transform=fig.transFigure) # save figure to file if savefig or figname: figname = figname or 'ovf_props_{}_{}.pdf'.format( strait, '_'.join(cases)) fig.savefig(figname, bbox_extra_artists=( lgd, spt, ), bbox_inches='tight') else: plt.show()