def updateFactor(self, factor, removeOld=True, F=1): ''' 续写一个因子文件夹下的所有文件 param factor: 因子名 param removeOld: 是否删除原有文件 param F: 调仓频率 ''' self.logger.info("************************Updating FactorData for {}************************".format(factor)) factorReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=None) factorReader.prepareOutputData() dateRangeDict = factorReader.getDateRange() # 获取存放首尾数据日期的字典 endDateList = sorted([range[1] for range in dateRangeDict.values()]) # 取所有的数据结束日期, 并排序 firstEndTime = endDateList[0].to_pydatetime() # 取所有数据结束日期中最早的一个 timeDiff = pd.tseries.offsets.BusinessDay(n=np.floor(2*F*self.dataFreq/86400) + 1) # 将2F转换成天数后+1 self.start = firstEndTime - timeDiff # 计算数据读取开始的时间 panelFeed = self.getPanelFeed() # 以新的start获取一个新的panelFeed modulePath = "cpa.factorPool.factors.{}".format(factor) # 因子模块路径 module = importlib.import_module(modulePath) # 导入模块 self.logger.info("The module {} has been imported successfully".format(factor)) factorObject = getattr(module, 'Factor') # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor resampleFeedDict = {} returnDict = {} rawFactorDict = {} factorTesterDict = {} dictOldResultDict = {} dictFilePathDict = {} for resample in self.testFreq: frequencyStr = const.DataFrequency.freq2lable(resample) resampleReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=frequencyStr) # 读取文件夹内所有文件 resampleReader.prepareOutputData() # 存入相应的字典中 oldResultDict = resampleReader.getTestResult() # 获取存放dataframe数据的字典 filePathDict = factorReader.getFilePath() # 获取原来H5文件的路径 key = str(resample).split(".")[-1] dictOldResultDict[key] = oldResultDict dictFilePathDict[key] = filePathDict resampleFeedDict[key] = ResampledPanelFeed(panelFeed, resample) returnDict[key] = returns.Returns(resampleFeedDict[key], lag=F, maxLen=1024) rawFactorDict[key] = factorBase.FactorPanel(resampleFeedDict[key], factorObject) factorTesterDict[key] = DefaultFactorTest(panelFeed=resampleFeedDict[key], factorPanel=rawFactorDict[key], returnPanel=returnDict[key], indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'], lag=F, cut=0.1) # rawFactor = factorBase.FactorPanel(panelFeed, factorObject) # factorTester = DefaultFactorTest(panelFeed, rawFactor, _return, # indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn'], # lag=F, # cut=0.1) panelFeed.run(2000) for key, oldResultDict in dictOldResultDict.items(): h5PanelWriter = h5Writer.H5PanelWriter(factorTesterDict[key], factor) h5PanelWriter.write(mode="append", oldResultDict=oldResultDict) # 使用append模式写入
def attachCalculator(self, factorCalculatorCls, factorMaxLen=None): ''' :param factorCalculatorCls: 设置因子计算类 :param factorFeedMaxLen: 因子存储最大值 :return: ''' inputFeed = self.filterdFeed if self.filterdFeed is not None else self.rawPanelFeed assert inputFeed is not None self.factorPanel = factorBase.FactorPanel(inputFeed, factorCalculatorCls, factorMaxLen) return self
def writeNewFactor(self, F=1): ''' 存储数据文件 param F: 调仓频率 ''' self.newFactorList() if self.newFactor: # 仅在有新增因子的情况下才进行后续的因子计算、检验及存储 for factor in self.newFactor: # 对新增因子列表里的因子进行计算和数据存储 if factor == 'broker': continue self.logger.info( "************************ Writing FactorData for {} ************************".format(factor)) modulePath = "cpa.factorPool.factors.{}".format(factor) # 因子模块路径 module = importlib.import_module(modulePath) self.logger.info("The module {} has been imported successfully".format(factor)) panelFeed = self.getPanelFeed() # 为新的因子匹配一个新的panelFeed reasampleFeedDict = {} # 几个字典,分别储存相应时间频率的变量 _return_Dict = {} factorObjectDict = {} rawFactorDict = {} factorTesterDict = {} for freq in self.testFreq: reasampleFeedDict[freq] = ResampledPanelFeed(panelFeed, freq) _return_Dict[freq] = returns.Returns(reasampleFeedDict[freq], lag=F, maxLen=1024) factorObjectDict[freq] = getattr(module, 'Factor') rawFactorDict[freq] = factorBase.FactorPanel(reasampleFeedDict[freq], factorObjectDict[freq]) factorTesterDict[freq] = DefaultFactorTest(reasampleFeedDict[freq], rawFactorDict[freq], _return_Dict[freq], indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'], lag=F, cut=0.1, fee=self.fee) panelFeed.run(2000) if len(_return_Dict[self.testFreq[0]]) <= 2 * F: # 若数据长度不符合因子检验标准,则不存储 self.logger.warning( "The length of the return panel <= 2 * the required lag. Data will not be saved.") return for freq in self.testFreq: h5PanelWriter = h5Writer.H5PanelWriter(factorTesterDict[freq], factor) h5PanelWriter.write(mode="new")
# rawFactor = factorBase.FactorPanel(panelFeed, maPanelFactor.Factor, 1024) # panel形式 # F = 30 # 调仓频率 # _return = returns.Returns(panelFeed, lag=F, maxLen=1024) # 以开盘价计算的向前n期收益 # factorTester = DefaultFactorTest(panelFeed, rawFactor, _return, # indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn'], # lag=F, # nGroup=10, cut=0.1) # 定义因子评价类 # panelFeed.run(300) # # factorTester.plotAll() # factorTester.plotGroupret() # # factorTester.plotGroupStat() '''resample数据回测''' panelFeed = InlineDataSet.SZ50_MINUTE() resampleFeed = ResampledPanelFeed(panelFeed, bar.Frequency.HOUR) rawFactor = factorBase.FactorPanel(resampleFeed, maPanelFactor.Factor, 1024) # panel形式 F = 1 # 调仓频率 _return = returns.Returns(resampleFeed, lag=F, maxLen=1024) # 以开盘价计算的向前n期收益 factorTester = DefaultFactorTest(resampleFeed, rawFactor, _return, indicators=[ 'IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet' ], lag=F, nGroup=10, cut=0.1) # 定义因子评价类 resampleFeed.run(3000)
def updateFactor(self, factor, nBizDaysAhead=30): ''' 续写一个因子文件夹下的所有文件 param factor: 因子名 param nBizDaysAhead: 以旧数据结束日期提前n个工作日开始计算新数据,根据策略需要调整 例如使用MA20的策略,对于2h的数据,至少要提前10个工作日 ''' self.logger.info("****************** Updating FactorData for {} ******************".format(factor)) factorReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=None, allFolders=True) factorReader.prepareOutputData() dateRangeDict = factorReader.getDateRange() # 获取存放首尾数据日期的字典 endDateList = sorted([range[1] for range in dateRangeDict.values()]) # 取所有的数据结束日期, 并排序 endDate = endDateList[-1].to_pydatetime() # 取所有数据结束日期中最晚的一个 timeDiff = pd.tseries.offsets.BusinessDay(n=nBizDaysAhead) # 比结束日期提前n个工作日开始计算新数据 self.start = endDate - timeDiff # 计算新数据所开始的时间 self.logger.info("The end time in the original data is {}\n" "The input time difference is {}\n" "The start time for calculating the new data is {}\n" "The end time for calculating the new data is {}\n" .format(endDate, timeDiff, self.start, self.end)) panelFeed = self.getPanelFeed() # 以新的start获取一个新的panelFeed modulePath = "cpa.factorPool.factors.{}".format(factor) # 因子模块路径 module = importlib.import_module(modulePath) # 导入模块 factorObject = getattr(module, 'Factor') # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr): folderPath = pathSelector.PathSelector.getFactorFilePath(factorName=factor, factorFrequency=freqStr) # 读取因子检测的参数值 csvFileName = [name for name in os.listdir(folderPath) if name.endswith(".csv")][0] csvFilePath = os.path.join(folderPath, csvFileName) fields = ["frequency", "lag", "nGroup", "cut", "fee", "poolNum"] settingReader = csvReader.CSVPanelReader(filePath=csvFilePath, fields=fields, frequency=freqNum, isInstrumentCol=False) settingReader.loads() # 读取不同周期的h5文件 freqReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=freqNum, allFolders=False) freqReader.prepareOutputData() # 存入相应的字典中 oldResultDict = freqReader.to_frame() # 获取存放dataframe数据的字典 filePathDict = freqReader.getFilePath() # 获取原来H5文件的路径 # 对各resample周期创建相应的模块类 self.dictOldResultDict[freqStr] = oldResultDict self.dictFilePathDict[freqStr] = filePathDict self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum) self._return_Dict[freqStr] = returns.Returns(self.reasampleFeedDict[freqStr], lag=self.lag, maxLen=1024) self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr], factorObject) self.factorTesterDict[freqStr] = DefaultFactorTest(feed=self.reasampleFeedDict[freqStr], factorPanel=self.rawFactorDict[freqStr], returnPanel=self._return_Dict[freqStr], indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'], lag=self.lag, cut=0.1, fee=self.fee) panelFeed.run(_print=True) # 由panelFeed同时驱动各resampleFeed for freqStr, oldResultDict in self.dictOldResultDict.items(): # 将旧的文件移入以时间命名的文件夹 oldDateTime = list(self.dictOldResultDict[freqStr].keys())[0][-16:-3] freqFolderPath = pathSelector.PathSelector.getFactorFilePath(factorName=factor, factorFrequency=freqStr) destFolderPath = os.path.join(freqFolderPath, oldDateTime) if not os.path.exists(destFolderPath): os.mkdir(destFolderPath) fileList = [name for name in os.listdir(freqFolderPath) if os.path.isfile(os.path.join(freqFolderPath, name))] for file in fileList: sourceFilePath = os.path.join(freqFolderPath, file) shutil.move(sourceFilePath, destFolderPath) # 写新的h5文件 h5PanelWriter = h5Writer.H5PanelWriter(factorName=factor, defaultFactorTest=self.factorTesterDict[freqStr]) h5PanelWriter.write(mode="append", oldResultDict=oldResultDict) # 使用append模式写入 for freqNum in self.resampleFreqNum: # 写新的图表文件 secondReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=freqNum) secondReader.prepareOutputData() reportWriter = ReportWriter(factorName=factor, h5BatchPanelReader=secondReader, csvPanelReader=settingReader) reportWriter.write()
def writeNewFactor(self): ''' 存储数据文件 ''' self.newFactorList() if self.newFactor: # 仅在有新增因子的情况下才进行后续的因子计算、检验及存储 for factor in self.newFactor: # 对新增因子列表里的因子进行计算和数据存储 if factor == 'broker': continue self.logger.info( "****************** Writing FactorData for {} ******************".format(factor)) modulePath = "cpa.factorPool.factors.{}".format(factor) # 因子模块路径 module = importlib.import_module(modulePath) # 导入模块 factorObject = getattr(module, 'Factor') # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor panelFeed = self.getPanelFeed() # 为新的因子匹配一个新的panelFeed # 计算绝对收益 if self.isRelReturn is False: # 对各resample周期创建相应的格模块类 for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr): self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum) self._return_Dict[freqStr] = returns.Returns(self.reasampleFeedDict[freqStr], lag=self.lag, maxLen=1024) self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr], factorObject) self.factorTesterDict[freqStr] = DefaultFactorTest(self.reasampleFeedDict[freqStr], self.rawFactorDict[freqStr], self._return_Dict[freqStr], indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'], lag=self.lag, cut=0.1, fee=self.fee) panelFeed.run(_print=True) # 由panelFeed同时驱动各resampleFeed # 计算相对收益 elif self.isRelReturn is True: # 生成一个存放resampleFeed的字典 for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr): self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum) baseFeedDict = {"base": panelFeed} # panelFeed字典 combinedDict = {**baseFeedDict, **self.reasampleFeedDict} #合并字典 benchPanel = self.getBenchPanel() # 基准指数panel advFeed = AdvancedFeed(feedDict=combinedDict, panelDict={'bench': benchPanel}) for freqStr in self.resampleFreqStr: # 对各resample周期创建相应的格模块类 self._return_Dict[freqStr] = returns.RelativeReturns(advFeed, isResample=True, resampleType=freqStr, lag=self.lag, maxLen=1024) self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr], factorObject) # self.rawFactorDict[freqStr] = factorBase.FactorPanel(advFeed, # factorObject, # isResample=True, # resampleType=freqStr) self.factorTesterDict[freqStr] = DefaultFactorTest(advFeed, self.rawFactorDict[freqStr], self._return_Dict[freqStr], isResample = True, resampleType = freqStr, indicators = ['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'], lag=self.lag, cut=0.1, fee=self.fee) advFeed.run(_print=True) # 由advancedFeed同时驱动各resampleFeed # 若数据长度不符合因子检验标准,则不存储 if len(self._return_Dict[self.resampleFreqStr[0]]) <= 2 * self.lag: self.logger.warning( "The length of the return panel <= 2 * the required lag. Data will not be saved.") return # 写h5文件和图表 for freqStr in self.resampleFreqStr: h5PanelWriter = h5Writer.H5PanelWriter(factor, self.factorTesterDict[freqStr]) h5PanelWriter.write(mode="new") reportWriter = ReportWriter(factorName=factor, defaultFactorTest=self.factorTesterDict[freqStr]) reportWriter.write()