def getTrainData_2(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) ema15.rename(columns={symbol: 'EMA15'}, inplace=True) sma15.rename(columns={symbol: 'SMA15'}, inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) bb2p.rename(columns={'LOWER': 'LOWER_BB2P'}, inplace=True) bb2p.rename(columns={'UPPER': 'UPPER_BB2P'}, inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) signal_macd.rename(columns={symbol: 'SG_MACD'}, inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 Xtrain = Xtrain.join(ema15['EMA15']) # 4 Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 Ytrain = df[symbol].shift(-1) # skip SET and shift Ytrain.dropna(0, inplace=True) assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] return Xtrain, Ytrain
def getTrainData_4(symbol, startDate, endDate, periods=14, remove_head=19): dates = pd.date_range(startDate, endDate) # day periods that predict a price is new high or not df = util.loadPriceData([symbol], dates) # skip periods day latest df_sliced = df.ix[0:len(df) - periods] price_close = pd.DataFrame(df_sliced[symbol]) set = pd.DataFrame(df_sliced['SET']) bbands = ind.get_BBANDS(price_close, symbol) ema26, ema12, macd = ind.average_convergence(price_close) rsi = ind.rsi(price_close) daily = ind.daily_returns(price_close) ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) volume = util.loadVolumeData(symbol, dates) #skip periods day latest volume_sliced = volume.ix[0:len(volume) - periods] assert len(volume_sliced) == len(price_close) volume_sliced = pd.DataFrame(volume_sliced[symbol]) obv = ind.OBV(volume_sliced, price_close) # Join data frame # rename column price_close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema26.rename(columns={symbol: 'EMA26'}, inplace=True) ema12.rename(columns={symbol: 'EMA12'}, inplace=True) daily.rename(columns={symbol: 'DAILY'}, inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) obv.rename(columns={symbol: 'OBV'}, inplace=True) Xtrain = price_close Xtrain = Xtrain.join(bbands['UPPER']) Xtrain = Xtrain.join(bbands['LOWER']) Xtrain = Xtrain.join(ema26) Xtrain = Xtrain.join(ema12) Xtrain = Xtrain.join(rsi) Xtrain = Xtrain.join(percent_KD['%K']) Xtrain = Xtrain.join(obv) Xtrain = Xtrain.join(set) upTrend = ind.isUpTrend(df, symbol, periods=periods) Ydigit = 1 * upTrend[ symbol] # multiply 1 : True is converted to 1 (up trend) , False becomes 0 assert len(Xtrain) == len(Ydigit) # skip at head row, avoid NaN values Xtrain = Xtrain.ix[remove_head:] Ydigit = Ydigit.ix[remove_head:] Xtrain.fillna(0, inplace=True) # protected NaN value return Xtrain, Ydigit
def getTrainData_3(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) #ema15.rename(columns={symbol:'EMA15'},inplace=True) #sma15.rename(columns={symbol:'SMA15'},inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) #bb2p.rename(columns={'LOWER':'LOWER_BB2P'},inplace=True) #bb2p.rename(columns={'UPPER':'UPPER_BB2P'},inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) #signal_macd.rename(columns={symbol:'SG_MACD'},inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 #Xtrain = Xtrain.join(ema15['EMA15']) # 4 #Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 #Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 #Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 #Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 df = df[symbol] # skip SET daily = ind.daily_returns(df) daily = daily.shift(-1) # predict tommorow daily.dropna(0, inplace=True) # drop NaN in last row Ytrain = 1 * (daily > 0.0) # if positive it converted 1, or 0 in negative assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] # normalize normalizer = preprocessing.Normalizer().fit(Xtrain) Xnorm = normalizer.transform(Xtrain) Xnorm = pd.DataFrame(Xnorm, columns=Xtrain.columns) return Xnorm, Ytrain