def getTrainData_1(symbol, startDate, endDate, periods=14, remove_head=19): dates = pd.date_range(startDate, endDate) df = util.loadPriceData([symbol], dates) util.fill_missing_values(df) # skip periods day latest (14 days) that predict a price is new high or not df_sliced = df.ix[0:len(df) - periods] price_close = pd.DataFrame(df_sliced[symbol]) set = pd.DataFrame(df_sliced['SET']) rsi = ind.rsi(price_close) / 100 # normalize sr = ind.create_dataframe_SR(df_sliced, [symbol]) myRatio = ind.get_myRatio(price_close) daily = ind.daily_returns(price_close) * 100 # normalize _, _, macd = ind.average_convergence(price_close) ohlcv = util.load_OHLCV(symbol, dates) percent_KD = ind.percent_KD(ohlcv) / 100 # normalize c2o = ind.daily_returns_2(ohlcv) * 100 # normalize volume = util.loadVolumeData([symbol], dates) #skip periods day latest volume_sliced = volume.ix[0:len(volume) - periods] assert len(volume_sliced) == len(df_sliced) obv = ind.OBV(volume_sliced, df_sliced) obv_rsi = ind.rsi(obv) / 100 # calcuate momentum with rsi and normalize set_rsi = ind.rsi(set) / 100 # calcuate momentum with rsi and normalize # Join data frame # rename column rsi.rename(columns={symbol: 'RSI'}, inplace=True) #sr.rename(columns={symbol:'SR'},inplace=True) myRatio.rename(columns={symbol: 'MY'}, inplace=True) daily.rename(columns={symbol: 'DAILY'}, inplace=True) macd.rename(columns={symbol: 'MACD'}, inplace=True) obv_rsi.rename(columns={symbol: 'OBV_RSI'}, inplace=True) set_rsi.rename(columns={'SET': 'SET_RSI'}, inplace=True) Xtrain = pd.DataFrame(index=df_sliced.index) Xtrain = Xtrain.join(rsi['RSI']) Xtrain = Xtrain.join(percent_KD['%K']) #Xtrain = Xtrain.join(sr['SR']) Xtrain = Xtrain.join(myRatio['MY']) Xtrain = Xtrain.join(daily['DAILY']) Xtrain = Xtrain.join(macd['MACD']) Xtrain = Xtrain.join(c2o) Xtrain = Xtrain.join(obv_rsi['OBV_RSI']) Xtrain = Xtrain.join(set_rsi) upTrend = ind.isUpTrend(df, symbol, periods=periods) Ydigit = 1 * upTrend[ symbol] # multiply 1 : True is converted to 1 (up trend) , False becomes 0 # skip at head row, avoid NaN values Xtrain = Xtrain.ix[remove_head:] Ydigit = Ydigit.ix[remove_head:] price_close = price_close.ix[remove_head:] return Xtrain, Ydigit, price_close
def writeDataSetY(df, symbols, predict=0, window=20, output_dir=output_dataset): symbols = df.columns.values df_sr = ind.create_dataframe_SR(df, symbols) df_result = ind.isStrongSR(df_sr) # By defult, Use data of 20 days (backward) to predict 15 days (foward) # Because I would like to know sharpe ratio that are more than 0 continue 10 days or not shift = window + predict -1 df_result = df_result.shift(-1*shift) df_result=df_result.dropna(0) df_result.to_csv(join(output_dir, "@Dataset_Y.csv"), index_label='<DTYYYYMMDD>')
def max_sharpe_ratio(periods=5): df_sr = ind.create_dataframe_SR(df, symbolList, window=periods) df_sr.fillna(0, inplace=True) # fill NaN to 0 df_sr = df_sr.shift(-(periods - 1)) df_sr.dropna(0, inplace=True) # drop NaN at tail result = np.mean(df_sr) # Returns the indices that would sort an array # max values at last element maxIndexResult = np.argsort(result) nameSymbol = [ df_sr.columns[index] for index in reversed(maxIndexResult) if result[index] > 0 ] # save to dump file pickle.dump(nameSymbol, open("symbol_list.p", "wb")) print("List max sharpe ratio\n", nameSymbol) df_norm = utl.normalize_data(df) utg.plotStock(df_norm, nameSymbol[0:5], startDate, endDate, 'High sharpe ratio')