def plot_industry_roc(startDate, endDate): dates = pd.date_range(startDate, endDate) symbolList = data.getSubIndustrySymbol() df = utl.loadPriceData(symbolList, dates) df_roc = ind.roc(df) df_roc.fillna(0, inplace=True) # fill NaN to 0 result = np.mean(df_roc) result = result.where(result > 0) result = result.where(result >= result['SET']) maxIndexResult = np.argsort(result) nameSymbol = [ df_roc.columns[index] for index in reversed(maxIndexResult) if result[index] > 0 ] print("List of Industry Group and Sector that ROC > SET (ROC)\n", nameSymbol) df_norm = utl.normalize_data(df) utg.plotStock(df_norm, nameSymbol, startDate, endDate, title='ROC: Industry Group and Sector')
def getTrainData_2(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) ema15.rename(columns={symbol: 'EMA15'}, inplace=True) sma15.rename(columns={symbol: 'SMA15'}, inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) bb2p.rename(columns={'LOWER': 'LOWER_BB2P'}, inplace=True) bb2p.rename(columns={'UPPER': 'UPPER_BB2P'}, inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) signal_macd.rename(columns={symbol: 'SG_MACD'}, inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 Xtrain = Xtrain.join(ema15['EMA15']) # 4 Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 Ytrain = df[symbol].shift(-1) # skip SET and shift Ytrain.dropna(0, inplace=True) assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] return Xtrain, Ytrain
def getTrainData_1(symbol, startDate, endDate, periods=14, remove_head=19): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) util.fill_missing_values(df) # skip periods day latest (14 days) that predict a price is new high or not df_sliced = df.ix[0:len(df) - periods] price_close = pd.DataFrame(df_sliced[symbol]) set = pd.DataFrame(df_sliced['SET']) rsi = ind.rsi(price_close) / 100 # normalize sr = ind.create_dataframe_SR(df_sliced, [symbol]) myRatio = ind.get_myRatio(price_close) daily = ind.daily_returns(price_close) * 100 # normalize _, _, macd = ind.average_convergence(price_close) ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) / 100 # normalize c2o = ind.daily_returns_2(ohlcv) * 100 # normalize volume = util.loadVolumeData([symbol], dates) #skip periods day latest volume_sliced = volume.ix[0:len(volume) - periods] assert len(volume_sliced) == len(df_sliced) obv = ind.OBV(volume_sliced, df_sliced) obv_rsi = ind.rsi(obv) / 100 # calcuate momentum with rsi and normalize set_rsi = ind.rsi(set) / 100 # calcuate momentum with rsi and normalize # Join data frame # rename column rsi.rename(columns={symbol: 'RSI'}, inplace=True) #sr.rename(columns={symbol:'SR'},inplace=True) myRatio.rename(columns={symbol: 'MY'}, inplace=True) daily.rename(columns={symbol: 'DAILY'}, inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) obv_rsi.rename(columns={symbol: 'OBV_RSI'}, inplace=True) set_rsi.rename(columns={'SET': 'SET_RSI'}, inplace=True) Xtrain = pd.DataFrame(index=df_sliced.index) Xtrain = Xtrain.join(rsi['RSI']) Xtrain = Xtrain.join(percent_KD['%K']) #Xtrain = Xtrain.join(sr['SR']) Xtrain = Xtrain.join(myRatio['MY']) Xtrain = Xtrain.join(daily['DAILY']) Xtrain = Xtrain.join(macd['MACD']) Xtrain = Xtrain.join(c2o) Xtrain = Xtrain.join(obv_rsi['OBV_RSI']) Xtrain = Xtrain.join(set_rsi) upTrend = ind.isUpTrend(df, symbol, periods=periods) Ydigit = 1 * upTrend[ symbol] # multiply 1 : True is converted to 1 (up trend) , False becomes 0 # skip at head row, avoid NaN values Xtrain = Xtrain.ix[remove_head:] Ydigit = Ydigit.ix[remove_head:] price_close = price_close.ix[remove_head:] return Xtrain, Ydigit, price_close
def getTrainData_4(symbol, startDate, endDate, periods=14, remove_head=19): dates = pd.date_range(startDate, endDate) # day periods that predict a price is new high or not df = util.loadPriceData([symbol], dates) # skip periods day latest df_sliced = df.ix[0:len(df) - periods] price_close = pd.DataFrame(df_sliced[symbol]) set = pd.DataFrame(df_sliced['SET']) bbands = ind.get_BBANDS(price_close, symbol) ema26, ema12, macd = ind.average_convergence(price_close) rsi = ind.rsi(price_close) daily = ind.daily_returns(price_close) ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) volume = util.loadVolumeData(symbol, dates) #skip periods day latest volume_sliced = volume.ix[0:len(volume) - periods] assert len(volume_sliced) == len(price_close) volume_sliced = pd.DataFrame(volume_sliced[symbol]) obv = ind.OBV(volume_sliced, price_close) # Join data frame # rename column price_close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema26.rename(columns={symbol: 'EMA26'}, inplace=True) ema12.rename(columns={symbol: 'EMA12'}, inplace=True) daily.rename(columns={symbol: 'DAILY'}, inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) obv.rename(columns={symbol: 'OBV'}, inplace=True) Xtrain = price_close Xtrain = Xtrain.join(bbands['UPPER']) Xtrain = Xtrain.join(bbands['LOWER']) Xtrain = Xtrain.join(ema26) Xtrain = Xtrain.join(ema12) Xtrain = Xtrain.join(rsi) Xtrain = Xtrain.join(percent_KD['%K']) Xtrain = Xtrain.join(obv) Xtrain = Xtrain.join(set) upTrend = ind.isUpTrend(df, symbol, periods=periods) Ydigit = 1 * upTrend[ symbol] # multiply 1 : True is converted to 1 (up trend) , False becomes 0 assert len(Xtrain) == len(Ydigit) # skip at head row, avoid NaN values Xtrain = Xtrain.ix[remove_head:] Ydigit = Ydigit.ix[remove_head:] Xtrain.fillna(0, inplace=True) # protected NaN value return Xtrain, Ydigit
def prepareDataSet(symbols, dates, csv_dir=DIR_SEC_CSV,output_dir=output_dataset): clearDir(output_dir) column_names = ['<CLOSE>', '<VOL>'] column_date = '<DTYYYYMMDD>' count = 0 for sym in symbols: df_X = createDataFrame(sym, dates, column_names, csvdir) #df_X.to_csv(join(output_dir, "{}_test_check.csv".format(sym))) df_norm = util.normalize_data(df_X) #df_norm.to_csv(join(output_dir, "{}_test_check_norm.csv".format(sym))) #ไม่มี index ในไฟล์ writeDataSetX(df_norm, join(output_dir, "{}_X.csv".format(sym))) if(count%20 == 0): print("Writing total files : {} ....".format(count)) count+=1; df_Y = util.loadPriceData(symbols, dates) writeDataSetY(df_Y, symbols)
def max_change(startDate, endDate): dates = pd.date_range(startDate, endDate) symbolList = data.getAllSymbol() df = utl.loadPriceData(symbolList, dates) day4 = df.ix[-4:] # select last dast in 4 days day4 = day4.iloc[::-1] # reverse rows day4 = day4.pct_change() # calcuate percent change of prices day4 = day4[1:4] # remove NaN in first rows day3 = day4.fillna(0) result = day3.mean() print(day3) maxIndexResult = np.argsort(result) nameSymbol = [ day3.columns[index] for index in reversed(maxIndexResult) if result[index] > 3 ] print("List of high percent change\n", nameSymbol) df_norm = utl.normalize_data(df) utg.plotStock(df_norm, nameSymbol[0:5], startDate, endDate, title='High percent change')
import indicator as ind import dataset as data import utility as utl import utilgraph as utg import numpy as np import pandas as pd import pickle startDate = '2017-05-01' endDate = utl.getCurrentDateStr() dates = pd.date_range(startDate, endDate) symbolList = data.getAllSymbol() df = utl.loadPriceData(symbolList, dates) def max_sharpe_ratio(periods=5): df_sr = ind.create_dataframe_SR(df, symbolList, window=periods) df_sr.fillna(0, inplace=True) # fill NaN to 0 df_sr = df_sr.shift(-(periods - 1)) df_sr.dropna(0, inplace=True) # drop NaN at tail result = np.mean(df_sr) # Returns the indices that would sort an array # max values at last element maxIndexResult = np.argsort(result) nameSymbol = [ df_sr.columns[index] for index in reversed(maxIndexResult) if result[index] > 0 ]
def getTrainData_3(symbol, startDate, endDate, remove_head=15): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) #1 close = df.ix[0:len(df) - 1] # skip tail assert len(df) == len(close) + 1 ema5 = ind.ema(close, periods=5) #2 sma5 = ind.ema(close, periods=5) #3 ema15 = ind.ema(close, periods=5) #4 sma15 = ind.ema(close, periods=5) #5 bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1) #6, #7 bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2) #8, #9 _, _, macd = ind.average_convergence(close) #10 signal_macd = ind.signal_MACD(macd) #11 rsi = ind.rsi(close) #12 ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) #14, 15 # rename columns close.rename(columns={symbol: 'CLOSE'}, inplace=True) ema5.rename(columns={symbol: 'EMA5'}, inplace=True) sma5.rename(columns={symbol: 'SMA5'}, inplace=True) #ema15.rename(columns={symbol:'EMA15'},inplace=True) #sma15.rename(columns={symbol:'SMA15'},inplace=True) bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True) bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True) #bb2p.rename(columns={'LOWER':'LOWER_BB2P'},inplace=True) #bb2p.rename(columns={'UPPER':'UPPER_BB2P'},inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) #signal_macd.rename(columns={symbol:'SG_MACD'},inplace=True) rsi.rename(columns={symbol: 'RSI'}, inplace=True) Xtrain = pd.DataFrame(index=close.index) Xtrain = Xtrain.join(close['CLOSE']) # 1 Xtrain = Xtrain.join(ema5['EMA5']) # 2 Xtrain = Xtrain.join(sma5['SMA5']) # 3 #Xtrain = Xtrain.join(ema15['EMA15']) # 4 #Xtrain = Xtrain.join(sma15['SMA15']) # 5 Xtrain = Xtrain.join(bb1p['LOWER_BB1P']) # 6 Xtrain = Xtrain.join(bb1p['UPPER_BB1P']) # 7 #Xtrain = Xtrain.join(bb2p['LOWER_BB2P']) # 8 #Xtrain = Xtrain.join(bb2p['UPPER_BB2P']) # 9 Xtrain = Xtrain.join(macd['MACD']) # 10 #Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11 Xtrain = Xtrain.join(rsi['RSI']) # 12 Xtrain = Xtrain.join(percent_KD) # 13 df = df[symbol] # skip SET daily = ind.daily_returns(df) daily = daily.shift(-1) # predict tommorow daily.dropna(0, inplace=True) # drop NaN in last row Ytrain = 1 * (daily > 0.0) # if positive it converted 1, or 0 in negative assert len(Ytrain) == len(Xtrain) # skip at head row, avoid NaN values Xtrain = Xtrain[remove_head:] Ytrain = Ytrain[remove_head:] # normalize normalizer = preprocessing.Normalizer().fit(Xtrain) Xnorm = normalizer.transform(Xtrain) Xnorm = pd.DataFrame(Xnorm, columns=Xtrain.columns) return Xnorm, Ytrain