def getTrainData_2(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) ema15.rename(columns={symbol: 'EMA15'}, inplace=True) sma15.rename(columns={symbol: 'SMA15'}, inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) bb2p.rename(columns={'LOWER': 'LOWER_BB2P'}, inplace=True) bb2p.rename(columns={'UPPER': 'UPPER_BB2P'}, inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) signal_macd.rename(columns={symbol: 'SG_MACD'}, inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 Xtrain = Xtrain.join(ema15['EMA15']) # 4 Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 Ytrain = df[symbol].shift(-1) # skip SET and shift Ytrain.dropna(0, inplace=True) assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] return Xtrain, Ytrain
def generate_onesecurity_onefreq(filepath): ########################################################################## # Input filepath = './ts/Future/IF/IF_15s.ts' ########################################################################## # datestimes, volrmb, open, high, low, # 2014-12-09 09:15:00, 880059660.000, 3306.800, 3307.200, 3306.800, # close, avgprice, bid, ask # 3307.200, 3307.331, 3306.900, 3308.100 # ########################################################################## # Output s_outpath ='./ics/Future/IF/IF_15s.ics' # datestimes, typ, ema50, ema27, rsi14, cci10, tr, kpi # 2005-01-04, 0.2367, 0.2360, 0.2360, 0.5000, 0.0000, 0.0420, 0.2360 # s_prod = filepath.split('/')[-1].split('_')[0] s_freq = filepath.split('_')[-1].split('.')[0] s_outdir = './ics/'+s_prodtype + '/' +s_prod+'/' s_outpath = s_outdir + s_prod + '_' + s_freq + '.ics' logger.info("generate_onesecurity_onefreq(%s,%s) start",s_prod,s_freq) df_prices = pd.read_csv(filepath, parse_dates=[0], index_col=0) df_c = df_prices['close'] df_h = df_prices['high'] df_l = df_prices['low'] typ = ic.typ(df_h , df_l , df_c) ema50 = ic.ema(df_c, 50) ema27 = ic.ema(df_c, 27) rsi = ic.rsi(df_c) cci10 = ic.cci(typ ,df_c, 10) tr = ic.tr(df_h,df_l,df_c) kpi = ic.kpi(df_c) ics = pd.concat([typ, ema50, ema27 ,rsi, cci10, tr, kpi], axis=1) ics.to_csv(s_outpath, float_format='%.4f') logger.info("generate_onesecurity_onefreq(%s,%s) done",s_prod,s_freq)
def getTrainData_3(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) #ema15.rename(columns={symbol:'EMA15'},inplace=True) #sma15.rename(columns={symbol:'SMA15'},inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) #bb2p.rename(columns={'LOWER':'LOWER_BB2P'},inplace=True) #bb2p.rename(columns={'UPPER':'UPPER_BB2P'},inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) #signal_macd.rename(columns={symbol:'SG_MACD'},inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 #Xtrain = Xtrain.join(ema15['EMA15']) # 4 #Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 #Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 #Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 #Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 df = df[symbol] # skip SET daily = ind.daily_returns(df) daily = daily.shift(-1) # predict tommorow daily.dropna(0, inplace=True) # drop NaN in last row Ytrain = 1 * (daily > 0.0) # if positive it converted 1, or 0 in negative assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] # normalize normalizer = preprocessing.Normalizer().fit(Xtrain) Xnorm = normalizer.transform(Xtrain) Xnorm = pd.DataFrame(Xnorm, columns=Xtrain.columns) return Xnorm, Ytrain