def getNeedToUpdateDaysOfFactor(self, code, factor): self.setIPODate(code) listDate = self.listDate delistDate = self.delistDate today = datetime.datetime.now().strftime("%Y%m%d") yesterday = TradedayDataProcess.getPreviousTradeday(today) endDate = min(delistDate, yesterday) lastDate = self.getLastDate(code, factor) if lastDate != EMPTY_STRING: startDate = max(listDate, TradedayDataProcess.getNextTradeday(lastDate)) else: startDate = listDate return [startDate, endDate] pass
def __computerFactor(self, code, startDate, endDate): myindex = IndexComponentDataProcess() myIndexBelongs50 = myindex.getStockBelongs(code, SSE50, startDate, endDate) myIndexBelongs300 = myindex.getStockBelongs(code, HS300, startDate, endDate) myIndexBelongs500 = myindex.getStockBelongs(code, CSI500, startDate, endDate) myStockWeightOf50 = IndexComponentDataProcess.getStockPropertyInIndex( code, SSE50, startDate, endDate) myStockWeightOf300 = IndexComponentDataProcess.getStockPropertyInIndex( code, HS300, startDate, endDate) myStockWeightOf500 = IndexComponentDataProcess.getStockPropertyInIndex( code, CSI500, startDate, endDate) tradedays = TradedayDataProcess.getTradedays(startDate, endDate) mydata = pd.DataFrame(data=tradedays) mydata.set_index('date', drop=True, inplace=True) mydata['is50'] = myIndexBelongs50['exists'] mydata['is300'] = myIndexBelongs300['exists'] mydata['is500'] = myIndexBelongs500['exists'] mydata['weight50'] = myStockWeightOf50['weight'] mydata['weight300'] = myStockWeightOf300['weight'] mydata['weight500'] = myStockWeightOf500['weight'] mydata.reset_index(drop=False, inplace=True) mycolumns = [ 'date', 'is50', 'is300', 'is500', 'weight50', 'weight300', 'weight500' ] mydata = mydata[mycolumns] return mydata pass ########################################################################
def analysisPerCode(self, code, startDate, endDate, feature, target): tradedays = list(TradedayDataProcess.getTradedays(startDate, endDate)) mydata = self.useJobLibToGetFactorDataCodeByCode(tradedays, 100, code) mycorr = mydata[[feature, target]].corr() print(mydata.shape) return mycorr pass
def prepareData(self,codeList,startDate,endDate): tradedays=TradedayDataProcess.getTradedays(startDate,endDate) myfactor=tickFactorsProcess() for day in tradedays: data=myfactor.parallelizationGetDataByDate(codeList,day) data=data[(data['tick']>='093000000') & (data['tick']<'145700000')] tickColumns=[ 'code', 'date', 'tick', 'lastPrice', 'S1', 'S2','S3', 'S4', 'S5', 'S6', 'S7','S8', 'S9', 'S10', 'B1', 'B2', 'B3', 'B4','B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'SV1', 'SV2', 'SV3', 'SV4', 'SV5','SV6', 'SV7', 'SV8', 'SV9', 'SV10', 'BV1', 'BV2', 'BV3', 'BV4', 'BV5','BV6', 'BV7', 'BV8', 'BV9', 'BV10', 'volume', 'amount','volumeIncrease', 'amountIncrease', 'midPrice'] #dailyColumns=['increaseToday','closeStd20','ts_closeStd20','preClose','is300','is500'] mycolumns=list(set(data.columns).difference(set(tickColumns))) mycolumns=mycolumns+['code', 'date', 'tick'] data=data[mycolumns] #print(data.shape) errorData=data[data[mycolumns].isna().sum(axis=1)>0] if errorData.shape[0]>0: logger.warning(f'factorData of date {day} has Nan!!!') #对部分因子进行去极值操作 #data=self.dataSelect(data,0.2) #逐日存储数据 fileName=os.path.join(self.path,str(day)+'.h5') exists=os.path.exists(fileName) if exists==True: os.remove(fileName) try: with pd.HDFStore(fileName,'a',complib='blosc:zstd',append=True,complevel=9) as store: store.append('data',data,append=True,format="table",data_columns=data.columns) except Exception as excp: logger.error(f'{fileName} error! {excp}') pass pass
def getLastTradedayTickData(self, code, date): previousday = TradedayDataProcess.getPreviousTradeday(date, 250) days = list(TradedayDataProcess.getTradedays(previousday, date)) days.reverse() data = pd.DataFrame() for day in days: if day < date: #data=TickDataProcess().getDataByDateFromLocalFile(code,day) data = TickDataProcess().getTickShotDataFromInfluxdbServer( code, day) if data.shape[0] > 0: return data pass pass pass return data pass
def getDataFromLocal(self, path, startDate, endDate): tradedays = TradedayDataProcess.getTradedays(startDate, endDate) tradedays = list(tradedays) with parallel_backend("multiprocessing", n_jobs=MYJOBS): mydata = Parallel()( delayed(self.getDataFromLocalDaily)(path, tradedays[i]) for i in range(len(tradedays))) all = pd.concat(mydata) return all pass ########################################################################
def __computerFactor(self, code, startDate, endDate): tradedays = TradedayDataProcess.getTradedays(startDate, endDate) mydata = pd.DataFrame(data=tradedays) mydata.set_index('date', drop=True, inplace=True) myindustry = IndustryClassification.getIndustryByCode( code, startDate, endDate) if myindustry.shape[0] == 0: return pd.DataFrame() mydata['industry'] = myindustry['industry'] mydata['industryName'] = myindustry['name'] mydata.reset_index(drop=False, inplace=True) mycolumns = ['date', 'industry', 'industryName'] mydata = mydata[mycolumns] return mydata pass
def __computerFactor(self, code, startDate, endDate): tradedays = TradedayDataProcess.getTradedays(startDate, endDate) mydata = pd.DataFrame(data=tradedays) mydata.set_index('date', drop=True, inplace=True) myDailyDerivative = KLineDataProcess('dailyDerivative') mydataDerivative = myDailyDerivative.getDataByDate( code, startDate, endDate) if mydataDerivative.shape[0] == 0: return pd.DataFrame() pass mydataDerivative.set_index('date', inplace=True) mydata['freeShares'] = mydataDerivative['freeShares'] mydata['freeMarketValue'] = mydataDerivative['freeMarketValue'] mydata.reset_index(drop=False, inplace=True) mycolumns = ['date', 'freeShares', 'freeMarketValue'] mydata = mydata[mycolumns] return mydata
def getDataFromLocal(self,startDate,endDate): tradedays=TradedayDataProcess.getTradedays(startDate,endDate) tradedays=list(tradedays) with parallel_backend("multiprocessing", n_jobs=JobLibUtility.myjobs): mydata=Parallel()(delayed(self.getDataFromLocalDaily)(tradedays[i]) for i in range(len(tradedays))) all=pd.concat(mydata) ''' for day in tradedays: fileName=os.path.join(self.path,str(day)+'.h5') try: with pd.HDFStore(fileName,'r',complib='blosc:zstd',append=True,complevel=9) as store: data=store['data'] all.append(data) except Exception as excp: logger.error(f'{fileName} error! {excp}') pass all=pd.concat(all) ''' return all pass
def __computerFactor(self, code, startDate, endDate): #需要前推100天来获取计算得数据 startDate = TradedayDataProcess.getPreviousTradeday(startDate, 100) [listDate, delistDate] = super().setIPODate(code) startDate = max(startDate, listDate) dailyData = KLineDataProcess('daily') mydata = dailyData.getDataByDate(code, startDate, endDate) if mydata.shape[0] == 0: return pd.DataFrame() mydata['return'] = (mydata['close'] / mydata['preClose'] - 1) mydata.loc[mydata['status'] == '停牌', 'return'] = np.nan mydata.loc[mydata['date'] == listDate, 'return'] = np.nan mydata['yesterdayReturn'] = mydata['return'].shift(1) mydata['closeStd20'] = mydata['yesterdayReturn'].rolling( 20, min_periods=16).std() mydata['ts_closeStd20'] = mydata['closeStd20'].rolling( 50, min_periods=20).apply( (lambda x: pd.Series(x).rank().iloc[-1] / len(x)), raw=True) mycolumns = ['date', 'yesterdayReturn', 'closeStd20', 'ts_closeStd20'] mydata = mydata[mycolumns] return mydata
def singleCode(self, code, startDate, endDate, parameters): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tick = tickFactorsProcess() select = [ 'code', 'date', 'tick', 'B1', 'S1', 'midIncreasePrevious3m', 'closeStd20', 'ts_closeStd20', 'ts_buySellForceChange', 'ts_buySellVolumeRatio5', 'ts_buySellVolumeRatio2', 'preClose', 'increaseToday', 'weight300', 'weight500' ] trade = [] for day in days: data = tick.getTickDataAndFactorsByDateFromLocalFile(code, day) if data.shape[0] > 0: trade0 = self.__strategy(data, select) trade.append(trade0) pass if trade == []: trade = pd.DataFrame() else: trade = pd.concat(trade) return trade pass
def myRNN(self,startDate,endDate,testStart,document): tradedays=TradedayDataProcess.getTradedays(startDate,endDate) filePath=os.path.join(LocalFileAddress,document) trainSet=[] testSet=[] for day in tradedays: fileName=os.path.join(filePath,str(day)+'.h5') data=super().getData(fileName) if day<str(testStart): trainSet.append(data) else: testSet.append(data) pass pass trainSet=pd.concat(trainSet) testSet=pd.concat(testSet) m=round(trainSet.corr(),3) print(m.loc[(m['midIncreaseNext1m'].abs()>=0.03),'midIncreaseNext1m'].sort_values()) print(m.loc[(m['midIncreaseMinNext1m'].abs()>=0.03),'midIncreaseMinNext1m'].sort_values()) print(m.loc[(m['midIncreaseMaxNext1m'].abs()>=0.03),'midIncreaseMaxNext1m'].sort_values()) print(m.loc[(m['midIncreaseMinNext2m'].abs()>=0.03),'midIncreaseMinNext2m'].sort_values()) print(m.loc[(m['midIncreaseMaxNext2m'].abs()>=0.03),'midIncreaseMaxNext2m'].sort_values()) print(m.loc[(m['midIncreaseMinNext5m'].abs()>=0.03),'midIncreaseMinNext5m'].sort_values()) print(m.loc[(m['midIncreaseMaxNext5m'].abs()>=0.03),'midIncreaseMaxNext5m'].sort_values())
def singleCode(self, code: str, startDate: str, endDate: str, parameters=[]): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tickFactors = tickFactorsProcess() tick = TickDataProcess() daily = dailyFactorsProcess() dailyKLine = KLineDataProcess('daily') trade = [] for day in days: #tickData=tick.getDataByDateFromLocalFile(code,day) tickData = tick.getTickShotDataFromInfluxdbServer(code, day) if tickData.shape[0] == 0: continue data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day) data = pd.merge(data, tickData, how='left', left_index=True, right_index=True) dailyData = daily.getSingleStockDailyFactors(code, day, day) for col in dailyData.columns: if col not in ['date', 'code']: data[col] = dailyData[col].iloc[0] dailyKLineData = dailyKLine.getDataByDate(code, day, day) data['preClose'] = dailyKLineData['preClose'].iloc[0] data['increaseToday'] = data['midPrice'] / data['preClose'] - 1 if np.isnan(data['weight300'].iloc[0]) == True: continue maxPosition = round( data['weight300'].iloc[0] * 100000000 / data['preClose'].iloc[0] / 100, -2) features = [ 'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread', 'differenceHighLow3m', 'midToVwap', 'midToVwap3m', 'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m', 'midStd60', 'increaseToday', 'closeStd20', 'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2', 'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5', 'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10' ] A = data[features] A = self.dataSelect(A, 0.2) A = A.values maxWeight = np.array([ 0.03218688, -0.0121024, -0.00970715, 0.48172206, 0.42610642, 0.10048948, -0.05574053, 0.08212702, -0.12357012, -0.00123216, 0.09529259, 0.00509518, 0.14970625, -0.00291313, 0.00402094, -0.00452788, 0.00286216, 0.0020172, -0.00235546 ]) minWeight = np.array([ -0.00385887, -0.01163938, 0.0043455, -0.01114819, -0.34286923, 0.08314041, 0.00154458, 0.12249813, -0.02194375, -0.00038749, -0.02217015, 0.00610296, -0.09264385, -0.0020065, 0.00249547, -0.00324293, 0.00501176, 0.00389697, -0.00294958 ]) maxIntercept = 0.00079871 minIntercept = -0.00155935 mymax = A.dot(maxWeight) + maxIntercept mymin = A.dot(minWeight) + minIntercept data['maxPredict'] = mymax data['minPredict'] = mymin data['maxPredict'] = data['maxPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['minPredict'] = data['minPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2 m = data[[ 'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict', 'minPredict', 'midPredict' ]] print(m.corr()) #long=data[(data['maxPredict']>0.01)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean() #short=data[(data['minPredict']<-0.01)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean() #print(long) #print(short) mycolumns = list(tickData.columns) mycolumns.append('maxPredict') mycolumns.append('minPredict') data = data[mycolumns] parameters = { 'maxPosition': maxPosition, 'longOpen': 0.015, 'shortOpen': -0.015, 'longClose': 0.01, 'shortClose': -0.01, 'transactionRatio': 0.2 } #trade0=self.strategy(data,parameters) #trade.append(trade0) pass if len(trade) == 0: trade = pd.DataFrame() else: trade = pd.concat(trade) trade['code'] = code trade['fee'] = trade['price'] * 0.0001 selectBuy = trade['direction'] == 'buy' selectSell = trade['direction'] == 'sell' trade.loc[selectSell, 'fee'] = (trade['fee'] + trade['price'] * 0.001)[selectSell] trade.loc[selectBuy, 'cashChange'] = ((-trade['price'] - trade['fee']) * trade['volume'])[selectBuy] trade.loc[selectSell, 'cashChange'] = ((trade['price'] - trade['fee']) * trade['volume'])[selectSell] trade['amount'] = trade['price'] * trade['volume'] return trade pass
def singleCode(self, code: str, startDate: str, endDate: str, parameters=[]): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tickFactors = tickFactorsProcess() tick = TickDataProcess() daily = dailyFactorsProcess() dailyKLine = KLineDataProcess('daily') file = os.path.join(LocalFileAddress, 'tmp', 'dnn001_midIncreaseNext5m.h5') trade = [] for day in days: tickData = tick.getDataByDateFromLocalFile(code, day) if tickData.shape[0] == 0: continue data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day) data = pd.merge(data, tickData, how='left', left_index=True, right_index=True) dailyData = daily.getSingleStockDailyFactors(code, day, day) for col in dailyData.columns: if col not in ['date', 'code']: data[col] = dailyData[col].iloc[0] dailyKLineData = dailyKLine.getDataByDate(code, day, day) data['preClose'] = dailyKLineData['preClose'].iloc[0] data['increaseToday'] = data['midPrice'] / data['preClose'] - 1 if np.isnan(data['weight300'].iloc[0]) == True: continue maxPosition = round( data['weight300'].iloc[0] * 100000000 / data['preClose'].iloc[0] / 100, -2) features = [ 'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread', 'differenceHighLow3m', 'midToVwap', 'midToVwap3m', 'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m', 'midStd60', 'increaseToday', 'closeStd20', 'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2', 'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5', 'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10' ] A = data[features] A = self.dataSelect(A, 0.2) A = A.values warnings.filterwarnings('ignore') model = keras.models.load_model(file) predictArray = model.predict(A, verbose=0) mymin = predictArray[:, 0] mymax = predictArray[:, 1] data['maxPredict'] = mymax data['minPredict'] = mymin data['maxPredict'] = data['maxPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['minPredict'] = data['minPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2 m = data[[ 'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict', 'minPredict', 'midPredict' ]] pd.set_option('display.max_rows', None) #print(m.corr()) #long=data[(data['maxPredict']>0.003)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean() #short=data[(data['minPredict']<-0.003)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean() #print(long) #print(short) mycolumns = list(tickData.columns) mycolumns.append('maxPredict') mycolumns.append('minPredict') mycolumns.append('midPredict') mycolumns.append('increaseToday') mycolumns.append('midInPrevious3m') data = data[mycolumns] parameters = { 'maxPosition': maxPosition, 'longOpen': 0.015, 'shortOpen': -0.015, 'longClose': 0.01, 'shortClose': -0.01, 'transactionRatio': 0.1 } trade0 = self.strategy(data, parameters) trade.append(trade0) pass if len(trade) == 0: trade = pd.DataFrame() else: trade = pd.concat(trade) if trade.shape[0] == 0: return pd.DataFrame() trade['code'] = code trade['fee'] = trade['price'] * 0.0001 selectBuy = trade['direction'] == 'buy' selectSell = trade['direction'] == 'sell' trade.loc[selectSell, 'fee'] = (trade['fee'] + trade['price'] * 0.001)[selectSell] trade.loc[selectBuy, 'cashChange'] = ((-trade['price'] - trade['fee']) * trade['volume'])[selectBuy] trade.loc[selectSell, 'cashChange'] = ((trade['price'] - trade['fee']) * trade['volume'])[selectSell] trade['amount'] = trade['price'] * trade['volume'] return trade pass