コード例 #1
0
    def updateFactor(self, factor, removeOld=True, F=1):
        '''
        续写一个因子文件夹下的所有文件
        param factor: 因子名
        param removeOld: 是否删除原有文件
        param F: 调仓频率
        '''
        self.logger.info("************************Updating FactorData for {}************************".format(factor))

        factorReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=None)
        factorReader.prepareOutputData()
        dateRangeDict = factorReader.getDateRange()  # 获取存放首尾数据日期的字典
        endDateList = sorted([range[1] for range in dateRangeDict.values()])  # 取所有的数据结束日期, 并排序
        firstEndTime = endDateList[0].to_pydatetime()  # 取所有数据结束日期中最早的一个
        timeDiff = pd.tseries.offsets.BusinessDay(n=np.floor(2*F*self.dataFreq/86400) + 1)  # 将2F转换成天数后+1
        self.start = firstEndTime - timeDiff  # 计算数据读取开始的时间
        panelFeed = self.getPanelFeed()  # 以新的start获取一个新的panelFeed

        modulePath = "cpa.factorPool.factors.{}".format(factor)  # 因子模块路径
        module = importlib.import_module(modulePath)  # 导入模块
        self.logger.info("The module {} has been imported successfully".format(factor))
        factorObject = getattr(module, 'Factor')  # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor

        resampleFeedDict = {}
        returnDict = {}
        rawFactorDict = {}
        factorTesterDict = {}
        dictOldResultDict = {}
        dictFilePathDict = {}
        for resample in self.testFreq:
            frequencyStr = const.DataFrequency.freq2lable(resample)
            resampleReader = h5Reader.H5BatchPanelReader(factorName=factor,
                                                         frequency=frequencyStr)  # 读取文件夹内所有文件
            resampleReader.prepareOutputData()  # 存入相应的字典中
            oldResultDict = resampleReader.getTestResult()  # 获取存放dataframe数据的字典
            filePathDict = factorReader.getFilePath()  # 获取原来H5文件的路径

            key = str(resample).split(".")[-1]
            dictOldResultDict[key] = oldResultDict
            dictFilePathDict[key] = filePathDict
            resampleFeedDict[key] = ResampledPanelFeed(panelFeed, resample)
            returnDict[key] = returns.Returns(resampleFeedDict[key], lag=F, maxLen=1024)
            rawFactorDict[key] = factorBase.FactorPanel(resampleFeedDict[key], factorObject)
            factorTesterDict[key] = DefaultFactorTest(panelFeed=resampleFeedDict[key],
                                                 factorPanel=rawFactorDict[key],
                                                 returnPanel=returnDict[key],
                                                 indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn', 'groupRet'],
                                                 lag=F,
                                                 cut=0.1)


        # rawFactor = factorBase.FactorPanel(panelFeed, factorObject)
        # factorTester = DefaultFactorTest(panelFeed, rawFactor, _return,
        #                                  indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn'],
        #                                  lag=F,
        #                                  cut=0.1)
        panelFeed.run(2000)
        for key, oldResultDict in dictOldResultDict.items():
            h5PanelWriter = h5Writer.H5PanelWriter(factorTesterDict[key], factor)
            h5PanelWriter.write(mode="append", oldResultDict=oldResultDict)        # 使用append模式写入
コード例 #2
0
 def attachCalculator(self, factorCalculatorCls, factorMaxLen=None):
     '''
     :param factorCalculatorCls: 设置因子计算类
     :param factorFeedMaxLen: 因子存储最大值
     :return:
     '''
     inputFeed = self.filterdFeed if self.filterdFeed is not None else self.rawPanelFeed
     assert inputFeed is not None
     self.factorPanel = factorBase.FactorPanel(inputFeed,
                                               factorCalculatorCls,
                                               factorMaxLen)
     return self
コード例 #3
0
    def writeNewFactor(self, F=1):
        '''
        存储数据文件
        param F: 调仓频率
        '''
        self.newFactorList()
        if self.newFactor:  # 仅在有新增因子的情况下才进行后续的因子计算、检验及存储
            for factor in self.newFactor:  # 对新增因子列表里的因子进行计算和数据存储
                if factor == 'broker':
                    continue

                self.logger.info(
                    "************************ Writing FactorData for {} ************************".format(factor))
                modulePath = "cpa.factorPool.factors.{}".format(factor)  # 因子模块路径
                module = importlib.import_module(modulePath)
                self.logger.info("The module {} has been imported successfully".format(factor))

                panelFeed = self.getPanelFeed()  # 为新的因子匹配一个新的panelFeed

                reasampleFeedDict = {}  # 几个字典,分别储存相应时间频率的变量
                _return_Dict = {}
                factorObjectDict = {}
                rawFactorDict = {}
                factorTesterDict = {}

                for freq in self.testFreq:
                    reasampleFeedDict[freq] = ResampledPanelFeed(panelFeed, freq)
                    _return_Dict[freq] = returns.Returns(reasampleFeedDict[freq], lag=F, maxLen=1024)
                    factorObjectDict[freq] = getattr(module, 'Factor')
                    rawFactorDict[freq] = factorBase.FactorPanel(reasampleFeedDict[freq], factorObjectDict[freq])
                    factorTesterDict[freq] = DefaultFactorTest(reasampleFeedDict[freq], rawFactorDict[freq], _return_Dict[freq],
                                                          indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn',
                                                                      'groupRet'],
                                                          lag=F, cut=0.1, fee=self.fee)
                panelFeed.run(2000)

                if len(_return_Dict[self.testFreq[0]]) <= 2 * F:  # 若数据长度不符合因子检验标准,则不存储
                    self.logger.warning(
                        "The length of the return panel <= 2 * the required lag. Data will not be saved.")
                    return

                for freq in self.testFreq:
                    h5PanelWriter = h5Writer.H5PanelWriter(factorTesterDict[freq], factor)
                    h5PanelWriter.write(mode="new")
コード例 #4
0
    # rawFactor = factorBase.FactorPanel(panelFeed, maPanelFactor.Factor, 1024)  # panel形式
    # F = 30  # 调仓频率
    # _return = returns.Returns(panelFeed, lag=F, maxLen=1024)  # 以开盘价计算的向前n期收益
    # factorTester = DefaultFactorTest(panelFeed, rawFactor, _return,
    #                                  indicators=['IC', 'rankIC', 'beta', 'gpIC', 'tbdf', 'turn'],
    #                                  lag=F,
    #                                  nGroup=10, cut=0.1)  # 定义因子评价类
    # panelFeed.run(300)
    #
    # factorTester.plotAll()
    # factorTester.plotGroupret()
    # # factorTester.plotGroupStat()
    '''resample数据回测'''
    panelFeed = InlineDataSet.SZ50_MINUTE()
    resampleFeed = ResampledPanelFeed(panelFeed, bar.Frequency.HOUR)
    rawFactor = factorBase.FactorPanel(resampleFeed, maPanelFactor.Factor,
                                       1024)  # panel形式
    F = 1  # 调仓频率
    _return = returns.Returns(resampleFeed, lag=F,
                              maxLen=1024)  # 以开盘价计算的向前n期收益
    factorTester = DefaultFactorTest(resampleFeed,
                                     rawFactor,
                                     _return,
                                     indicators=[
                                         'IC', 'rankIC', 'beta', 'gpIC',
                                         'tbdf', 'turn', 'groupRet'
                                     ],
                                     lag=F,
                                     nGroup=10,
                                     cut=0.1)  # 定义因子评价类
    resampleFeed.run(3000)
    def updateFactor(self, factor, nBizDaysAhead=30):
        '''
        续写一个因子文件夹下的所有文件
        param factor: 因子名
        param nBizDaysAhead: 以旧数据结束日期提前n个工作日开始计算新数据,根据策略需要调整
                             例如使用MA20的策略,对于2h的数据,至少要提前10个工作日
        '''
        self.logger.info("****************** Updating FactorData for {} ******************".format(factor))

        factorReader = h5Reader.H5BatchPanelReader(factorName=factor, frequency=None, allFolders=True)
        factorReader.prepareOutputData()
        dateRangeDict = factorReader.getDateRange()  # 获取存放首尾数据日期的字典
        endDateList = sorted([range[1] for range in dateRangeDict.values()])  # 取所有的数据结束日期, 并排序
        endDate = endDateList[-1].to_pydatetime()  # 取所有数据结束日期中最晚的一个
        timeDiff = pd.tseries.offsets.BusinessDay(n=nBizDaysAhead)  # 比结束日期提前n个工作日开始计算新数据
        self.start = endDate - timeDiff  # 计算新数据所开始的时间
        self.logger.info("The end time in the original data is {}\n"
                         "The input time difference is {}\n"
                         "The start time for calculating the new data is {}\n"
                         "The end time for calculating the new data is {}\n"
                         .format(endDate, timeDiff, self.start, self.end))
        panelFeed = self.getPanelFeed()  # 以新的start获取一个新的panelFeed

        modulePath = "cpa.factorPool.factors.{}".format(factor)  # 因子模块路径
        module = importlib.import_module(modulePath)  # 导入模块
        factorObject = getattr(module, 'Factor')  # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor


        for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr):
            folderPath = pathSelector.PathSelector.getFactorFilePath(factorName=factor, factorFrequency=freqStr)
            # 读取因子检测的参数值
            csvFileName = [name for name in os.listdir(folderPath) if name.endswith(".csv")][0]
            csvFilePath = os.path.join(folderPath, csvFileName)
            fields = ["frequency", "lag", "nGroup", "cut", "fee", "poolNum"]
            settingReader = csvReader.CSVPanelReader(filePath=csvFilePath,
                                                     fields=fields,
                                                     frequency=freqNum,
                                                     isInstrumentCol=False)
            settingReader.loads()

            # 读取不同周期的h5文件
            freqReader = h5Reader.H5BatchPanelReader(factorName=factor,
                                                     frequency=freqNum,
                                                     allFolders=False)
            freqReader.prepareOutputData()  # 存入相应的字典中
            oldResultDict = freqReader.to_frame()  # 获取存放dataframe数据的字典
            filePathDict = freqReader.getFilePath()  # 获取原来H5文件的路径

            # 对各resample周期创建相应的模块类
            self.dictOldResultDict[freqStr] = oldResultDict
            self.dictFilePathDict[freqStr] = filePathDict
            self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum)
            self._return_Dict[freqStr] = returns.Returns(self.reasampleFeedDict[freqStr], lag=self.lag, maxLen=1024)
            self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr], factorObject)
            self.factorTesterDict[freqStr] = DefaultFactorTest(feed=self.reasampleFeedDict[freqStr],
                                                               factorPanel=self.rawFactorDict[freqStr],
                                                               returnPanel=self._return_Dict[freqStr],
                                                               indicators=['IC', 'rankIC', 'beta', 'gpIC',
                                                                           'tbdf', 'turn', 'groupRet'],
                                                               lag=self.lag,
                                                               cut=0.1,
                                                               fee=self.fee)

        panelFeed.run(_print=True)  # 由panelFeed同时驱动各resampleFeed


        for freqStr, oldResultDict in self.dictOldResultDict.items():
            # 将旧的文件移入以时间命名的文件夹
            oldDateTime = list(self.dictOldResultDict[freqStr].keys())[0][-16:-3]
            freqFolderPath = pathSelector.PathSelector.getFactorFilePath(factorName=factor, factorFrequency=freqStr)
            destFolderPath = os.path.join(freqFolderPath, oldDateTime)
            if not os.path.exists(destFolderPath):
                os.mkdir(destFolderPath)
            fileList = [name for name in os.listdir(freqFolderPath) if
                              os.path.isfile(os.path.join(freqFolderPath, name))]
            for file in fileList:
                sourceFilePath = os.path.join(freqFolderPath, file)
                shutil.move(sourceFilePath, destFolderPath)

            # 写新的h5文件
            h5PanelWriter = h5Writer.H5PanelWriter(factorName=factor,
                                                   defaultFactorTest=self.factorTesterDict[freqStr])
            h5PanelWriter.write(mode="append", oldResultDict=oldResultDict)  # 使用append模式写入

        for freqNum in self.resampleFreqNum:
            # 写新的图表文件
            secondReader = h5Reader.H5BatchPanelReader(factorName=factor,
                                                       frequency=freqNum)
            secondReader.prepareOutputData()
            reportWriter = ReportWriter(factorName=factor,
                                        h5BatchPanelReader=secondReader,
                                        csvPanelReader=settingReader)
            reportWriter.write()
    def writeNewFactor(self):
        '''
        存储数据文件
        '''
        self.newFactorList()
        if self.newFactor:  # 仅在有新增因子的情况下才进行后续的因子计算、检验及存储
            for factor in self.newFactor:  # 对新增因子列表里的因子进行计算和数据存储
                if factor == 'broker':
                    continue

                self.logger.info(
                    "****************** Writing FactorData for {} ******************".format(factor))
                modulePath = "cpa.factorPool.factors.{}".format(factor)  # 因子模块路径
                module = importlib.import_module(modulePath)  # 导入模块
                factorObject = getattr(module, 'Factor')  # 获取因子对象的名称 e.g. cpa.factorPool.factors.dmaEwv.Factor
                panelFeed = self.getPanelFeed()  # 为新的因子匹配一个新的panelFeed

                # 计算绝对收益
                if self.isRelReturn is False:
                    # 对各resample周期创建相应的格模块类
                    for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr):
                        self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum)
                        self._return_Dict[freqStr] = returns.Returns(self.reasampleFeedDict[freqStr],
                                                                     lag=self.lag,
                                                                     maxLen=1024)
                        self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr], factorObject)
                        self.factorTesterDict[freqStr] = DefaultFactorTest(self.reasampleFeedDict[freqStr],
                                                                           self.rawFactorDict[freqStr],
                                                                           self._return_Dict[freqStr],
                                                                           indicators=['IC', 'rankIC', 'beta', 'gpIC',
                                                                                       'tbdf', 'turn', 'groupRet'],
                                                                           lag=self.lag,
                                                                           cut=0.1,
                                                                           fee=self.fee)
                    panelFeed.run(_print=True)  # 由panelFeed同时驱动各resampleFeed

                # 计算相对收益
                elif self.isRelReturn is True:
                    # 生成一个存放resampleFeed的字典
                    for freqNum, freqStr in zip(self.resampleFreqNum, self.resampleFreqStr):
                        self.reasampleFeedDict[freqStr] = ResampledPanelFeed(panelFeed, freqNum)
                    baseFeedDict = {"base": panelFeed}  # panelFeed字典
                    combinedDict = {**baseFeedDict, **self.reasampleFeedDict}  #合并字典
                    benchPanel = self.getBenchPanel()  # 基准指数panel
                    advFeed = AdvancedFeed(feedDict=combinedDict, panelDict={'bench': benchPanel})

                    for freqStr in self.resampleFreqStr:
                        # 对各resample周期创建相应的格模块类
                        self._return_Dict[freqStr] = returns.RelativeReturns(advFeed,
                                                                             isResample=True,
                                                                             resampleType=freqStr,
                                                                             lag=self.lag,
                                                                             maxLen=1024)
                        self.rawFactorDict[freqStr] = factorBase.FactorPanel(self.reasampleFeedDict[freqStr],
                                                                             factorObject)
                        # self.rawFactorDict[freqStr] = factorBase.FactorPanel(advFeed,
                        #                                                      factorObject,
                        #                                                      isResample=True,
                        #                                                      resampleType=freqStr)
                        self.factorTesterDict[freqStr] = DefaultFactorTest(advFeed,
                                                                           self.rawFactorDict[freqStr],
                                                                           self._return_Dict[freqStr],
                                                                           isResample = True,
                                                                           resampleType = freqStr,
                                                                           indicators = ['IC', 'rankIC', 'beta', 'gpIC',
                                                                                       'tbdf', 'turn', 'groupRet'],
                                                                           lag=self.lag,
                                                                           cut=0.1,
                                                                           fee=self.fee)
                    advFeed.run(_print=True)  # 由advancedFeed同时驱动各resampleFeed

                # 若数据长度不符合因子检验标准,则不存储
                if len(self._return_Dict[self.resampleFreqStr[0]]) <= 2 * self.lag:
                    self.logger.warning(
                        "The length of the return panel <= 2 * the required lag. Data will not be saved.")
                    return

                # 写h5文件和图表
                for freqStr in self.resampleFreqStr:
                    h5PanelWriter = h5Writer.H5PanelWriter(factor, self.factorTesterDict[freqStr])
                    h5PanelWriter.write(mode="new")
                    reportWriter = ReportWriter(factorName=factor,
                                                defaultFactorTest=self.factorTesterDict[freqStr])
                    reportWriter.write()