def getTickDataAndFactorsByDateFromLocalFile(self, code, date, factors=TICKFACTORSUSED): myfactor = factorBase() mydata = pd.DataFrame() for item in factors: factor = item['factor'] data = myfactor.getDataFromLocalFile(code, date, factor) if mydata.shape[0] == 0: #如果还没有取出来数据 mydata = data.copy() pass elif data.shape[0] != 0: mydata = pd.merge(mydata, data, how='left', left_index=True, right_index=True) pass tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return mydata #dailyFactor=['closeStd','index','marketValue','industry'] dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) for col in dailyData.columns: if col not in ['date', 'code', 'return']: mydata[col] = dailyData[col].iloc[0] dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass return mydata
def updateAllFactorsByCodeAndDate(self, code, date): code = str(code) date = str(date) data = pd.DataFrame() logger.info(f'Compute factors of {code} in {date} start!') factorList = TICKFACTORSNEEDTOUPDATE for factor in factorList: mymodule = importlib.import_module(factor['module']) myclass = getattr(mymodule, factor['class']) myinstance = myclass() exists = myinstance.checkLocalFile(code, date, factor['factor']) if exists == False: if data.shape[0] == 0: tick = TickDataProcess() data = tick.getDataByDateFromLocalFile(code, date) if data.shape[0] == 0: logger.warning( f'There is no tickShots of {code} in {date}') return pass myinstance.updateFactor(code, date, data) pass
def saveAllFactorsByCodeAndDate(self, code, date): mypath = os.path.join(self.path, str(code).replace('.', '_')) HDF5Utility.pathCreate(mypath) fileName = os.path.join(mypath, str(date) + '.h5') exists = HDF5Utility.fileCheck(fileName) if exists == True: #如果文件已将存在,直接返回 return myfactor = factorBase() mydata = pd.DataFrame() factors = self.factorsUsed #获取tick因子数据 mydata = self.getFactorsUsedByDateFromLocalFile(code, date, factors) #获取tick行情数据 tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return #获取日线数据 dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 mydata = mydata[mydata['tick'] < '145700000'] #删去涨跌停之后的数据 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass excludedColumns = [ 'preClose', 'buyVolume2', 'buyVolume5', 'buyVolume10', 'sellVolume2', 'sellVolume5', 'sellVolume10' ] mycolumns = list( set(mydata.columns).difference( set(list(tickData.columns) + excludedColumns))) mycolumns.sort() mydata = mydata[mycolumns] if mydata.shape[0] == 0: return try: logger.info(f'Recording factors of {code} in {date}!') with pd.HDFStore(fileName, 'a', complib='blosc:zstd', append=True, complevel=9) as store: store.append('data', mydata, append=True, format="table", data_columns=mydata.columns) except Exception as excp: logger.error(f'{fileName} error! {excp}')
def singleCode(self, code: str, startDate: str, endDate: str, parameters=[]): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tickFactors = tickFactorsProcess() tick = TickDataProcess() daily = dailyFactorsProcess() dailyKLine = KLineDataProcess('daily') file = os.path.join(LocalFileAddress, 'tmp', 'dnn001_midIncreaseNext5m.h5') trade = [] for day in days: tickData = tick.getDataByDateFromLocalFile(code, day) if tickData.shape[0] == 0: continue data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day) data = pd.merge(data, tickData, how='left', left_index=True, right_index=True) dailyData = daily.getSingleStockDailyFactors(code, day, day) for col in dailyData.columns: if col not in ['date', 'code']: data[col] = dailyData[col].iloc[0] dailyKLineData = dailyKLine.getDataByDate(code, day, day) data['preClose'] = dailyKLineData['preClose'].iloc[0] data['increaseToday'] = data['midPrice'] / data['preClose'] - 1 if np.isnan(data['weight300'].iloc[0]) == True: continue maxPosition = round( data['weight300'].iloc[0] * 100000000 / data['preClose'].iloc[0] / 100, -2) features = [ 'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread', 'differenceHighLow3m', 'midToVwap', 'midToVwap3m', 'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m', 'midStd60', 'increaseToday', 'closeStd20', 'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2', 'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5', 'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10' ] A = data[features] A = self.dataSelect(A, 0.2) A = A.values warnings.filterwarnings('ignore') model = keras.models.load_model(file) predictArray = model.predict(A, verbose=0) mymin = predictArray[:, 0] mymax = predictArray[:, 1] data['maxPredict'] = mymax data['minPredict'] = mymin data['maxPredict'] = data['maxPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['minPredict'] = data['minPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2 m = data[[ 'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict', 'minPredict', 'midPredict' ]] pd.set_option('display.max_rows', None) #print(m.corr()) #long=data[(data['maxPredict']>0.003)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean() #short=data[(data['minPredict']<-0.003)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean() #print(long) #print(short) mycolumns = list(tickData.columns) mycolumns.append('maxPredict') mycolumns.append('minPredict') mycolumns.append('midPredict') mycolumns.append('increaseToday') mycolumns.append('midInPrevious3m') data = data[mycolumns] parameters = { 'maxPosition': maxPosition, 'longOpen': 0.015, 'shortOpen': -0.015, 'longClose': 0.01, 'shortClose': -0.01, 'transactionRatio': 0.1 } trade0 = self.strategy(data, parameters) trade.append(trade0) pass if len(trade) == 0: trade = pd.DataFrame() else: trade = pd.concat(trade) if trade.shape[0] == 0: return pd.DataFrame() trade['code'] = code trade['fee'] = trade['price'] * 0.0001 selectBuy = trade['direction'] == 'buy' selectSell = trade['direction'] == 'sell' trade.loc[selectSell, 'fee'] = (trade['fee'] + trade['price'] * 0.001)[selectSell] trade.loc[selectBuy, 'cashChange'] = ((-trade['price'] - trade['fee']) * trade['volume'])[selectBuy] trade.loc[selectSell, 'cashChange'] = ((trade['price'] - trade['fee']) * trade['volume'])[selectSell] trade['amount'] = trade['price'] * trade['volume'] return trade pass