def __init__(self): self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码 self.indexFieldName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段 self.fundFieldName = ["nav","NAV_acc","sec_name"] self.engine = MysqlCon().getMysqlCon(flag='engine') self.conn = MysqlCon().getMysqlCon(flag='connect') self.PrintInfoDemo = PrintInfo()
def __init__(self): self.startDate = '2006-01-01' # self.endDate = '2017-06-01' # 回测截止时间 self.endDate = date.today().strftime('%Y-%m-%d') self.plotFlag = False # 是否绘图 self.PrintInfoDemo = PrintInfo() # 日志信息模块
def __init__(self): self.PrintInfoDemo = PrintInfo() self.riskFree = 0 # 无风险利率 calcDate = {} calcDate['oneMonth'] = (u'近一月', 21 * 1) calcDate['ThreeMonths'] = (u'近三月', 21 * 3) calcDate['SixMonths'] = (u'近六月', 21 * 6) calcDate['OneYear'] = (u'近一年', 21 * 12) calcDate['TwoYears'] = (u'近两年', 21 * 12 * 2) calcDate['ThreeYears'] = (u'近三年', 21 * 12 * 3) calcDate['TotalPeriod'] = (u'成立以来', np.inf) self.calcDate = calcDate
def __init__(self, dicParam): self.fundCode = dicParam['fundCode'] self.netValuePeriod = dicParam.get('netValuePeriod', '') self.isPosition = dicParam.get('isPosition', False) self.startDate = dicParam.get('startDate', '2015-01-01') self.endDate = dicParam.get('endDate', '2019-01-01') self.indexNameDic = {'000300.SH': '沪深300', '000016.SH': '上证50', '000905.SH': '中证500', '000906.SH': '中证800'} self.totalIndexName = list(self.indexNameDic.values()) self.engine = MysqlCon().getMysqlCon() self.PrintInfoDemo = PrintInfo() self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql()
def __init__(self, assetIndex={}, backDate=date.today().strftime('%Y-%m-%d')): self.dicProduct = fundPool.getFundPool() self.getInfoFlag = True self.backDate = backDate self.assetIndex = assetIndex #大类资产指数 self.PrintInfoDemo = PrintInfo() # 日志信息模块
def __init__(self): self.PrintInfoDemo = PrintInfo() self.CalcRiskReturnToExcelDemo = CalcRiskReturnToExcel()
class GetDataFromWindAndMySql: def __init__(self): self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码 self.indexFieldName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段 self.fundFieldName = ["nav","NAV_acc","sec_name"] self.engine = MysqlCon().getMysqlCon(flag='engine') self.conn = MysqlCon().getMysqlCon(flag='connect') self.PrintInfoDemo = PrintInfo() # 获取缺失数据到Mysql def getLackDataToMySql(self, tempCode, startDate, endDate,tableFlag='index'): if tableFlag=='index': tableStr = 'index_value' codeName = 'index_code' elif tableFlag == 'fund': tableStr = 'fund_net_value' codeName ='fund_code' sqlStr = "select max(update_time),min(update_time) from %s where %s='%s'" % (tableStr,codeName,tempCode) cursor = self.conn.cursor() cursor.execute(sqlStr) dateStrTuple = cursor.fetchall()[0] maxDate = dateStrTuple[0] minDate = dateStrTuple[1] if not maxDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate,tableFlag=tableFlag) return if endDate < minDate or startDate > minDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate,tableFlag=tableFlag) elif startDate <= minDate: if minDate <= endDate < maxDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate,tableFlag=tableFlag) elif endDate >= maxDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate,tableFlag=tableFlag) self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate,tableFlag=tableFlag) elif endDate >= maxDate: self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate,tableFlag=tableFlag) # 从wind获取数据 def getDataFromWind(self,tempCode, startDate='2019-04-01', endDate='2019-04-30',tableFlag='index'): if tableFlag=='index': tableStr = 'index_value' nameDic = {"OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price", "VOLUME": "volume", "AMT": "amt", "CHG": "chg", "PCT_CHG": "pct_chg", "TURN": "turn"} fields = self.indexFieldName codeName = 'index_code' else : tableStr = 'fund_net_value' nameDic = {"NAV":"net_value","NAV_ACC":"acc_net_value","SEC_NAME":"fund_name"} fields = self.fundFieldName codeName = 'fund_code' wsetdata = w.wsd(codes=tempCode, fields=fields, beginTime=startDate, endTime=endDate) if wsetdata.ErrorCode!=0: self.PrintInfoDemo.PrintLog("获取行情数据有误,错误代码"+str(wsetdata.ErrorCode)) return tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T tempDf[codeName] = tempCode tempDf['update_time'] = wsetdata.Times tempDf.rename(columns=nameDic, inplace=True) tempDf.to_sql(tableStr, con=self.engine, index=False, if_exists='append') # w.close() return tempDf def getDataFromMySql(self, tempCode, startDate, endDate, tableFlag='index',nameList=['close_price']): if not nameList: self.PrintInfoDemo.PrintLog('传入获取指数的字段不合法,请检查!') if tableFlag=='index': tableStr = 'index_value' codeName = 'index_code' else: codeName = 'fund_code' tableStr = 'fund_net_value' sqlStr = "select %s,update_time from %s where %s='%s' and update_time>='%s'" \ " and update_time<='%s'" % (','.join(nameList),tableStr, codeName, tempCode,startDate, endDate) resultDf = pd.read_sql(sql=sqlStr, con=self.engine) resultDf.set_index(keys='update_time', inplace=True, drop=True) resultDf = resultDf.drop_duplicates().sort_index() return resultDf def getHQData(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index',nameList=['close_price']): ''' #获取指数行情数据入口 ''' self.getLackDataToMySql(tempCode, startDate, endDate,tableFlag) resultDf = self.getDataFromMySql(tempCode, startDate, endDate, tableFlag=tableFlag,nameList=nameList) return resultDf def getTradeDay(self, startdate, endDate, Period=''): ''' 获取指定周期交易日,封装wind接口 :param Period: ''日,W周,M月,Q季,S半年,Y年 :return: ''' # w.start() data = w.tdays(beginTime=startdate, endTime=endDate, options="Period=%s" % Period) if data.ErrorCode!=0: self.PrintInfoDemo.PrintLog('wind获取交易日期错误,请检查!') return tradeDayList = data.Data[0] tradeDayList = [tradeDay.strftime('%Y-%m-%d') for tradeDay in tradeDayList] # w.close() return tradeDayList
class EstimateValue: def __init__(self, dicParam): self.fundCode = dicParam['fundCode'] self.netValuePeriod = dicParam.get('netValuePeriod', '') self.isPosition = dicParam.get('isPosition', False) self.startDate = dicParam.get('startDate', '2015-01-01') self.endDate = dicParam.get('endDate', '2019-01-01') self.indexNameDic = { '000300.SH': '沪深300', '000016.SH': '上证50', '000905.SH': '中证500', '000906.SH': '中证800' } self.totalIndexName = list(self.indexNameDic.values()) self.engine = MysqlCon().getMysqlCon() self.PrintInfoDemo = PrintInfo() self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql() def getNetValueDataDic(self): indexCodeList = ['000300.SH', '000016.SH', '000905.SH', '000906.SH'] dicResult = {} self.PrintInfoDemo.PrintLog("获取基金净值数据...") sqlStr = "select fund_name,update_time,net_value,acc_net_value " \ "from fund_net_value where fund_code='%s'" % self.fundCode netValuedf = pd.read_sql(sql=sqlStr, con=self.engine, index_col='update_time') if netValuedf.empty: netValuedf = self.GetDataFromWindAndMySqlDemo.getHQData( tempCode=self.fundCode, startDate=self.startDate, endDate=self.endDate, tableFlag='fund', nameList=['fund_name', 'acc_net_value']) self.PrintInfoDemo.PrintLog("基金净值数据获取成功!") self.fundName = netValuedf['fund_name'].unique()[0] dicResult['fundName'] = self.fundName netValuedf.drop(labels='fund_name', inplace=True, axis=1) dicResult['netValuedf'] = netValuedf self.PrintInfoDemo.PrintLog("获取大盘指数数据...") startDate = netValuedf.index.tolist()[0] endDate = netValuedf.index.tolist()[-1] dfIndexList = [] dfVolumeList = [] for indexCode in indexCodeList: indexDf = self.GetDataFromWindAndMySqlDemo.getHQData( tempCode=indexCode, startDate=startDate, endDate=endDate) indexDf.rename(columns={'close_price': indexCode}, inplace=True) dfIndexList.append(indexDf) indexDf = self.GetDataFromWindAndMySqlDemo.getHQData( tempCode=indexCode, startDate=startDate, endDate=endDate, nameList=['volume']) indexDf.rename(columns={'volume': indexCode}, inplace=True) dfVolumeList.append(indexDf) self.PrintInfoDemo.PrintLog("获取大盘指数数据成功!") totalIndexDf = pd.concat(dfIndexList, axis=1) totalVolumeDf = pd.concat(dfVolumeList, axis=1) dicResult['indexDf'] = totalIndexDf dicResult['totalVolumeDf'] = totalVolumeDf # 行业指数 industryList = ['801210.SI', '801050.SI', '801140.SI', '801020.SI', '801170.SI', '801030.SI', '801150.SI', '801010.SI', '801200.SI', '801230.SI', '801770.SI', '801730.SI', \ '801130.SI', '801880.SI', '801180.SI', '801160.SI', '801780.SI', '801890.SI', '801080.SI', '801760.SI', '801790.SI', '801710.SI', '801740.SI', '801720.SI', \ '801750.SI', '801110.SI', '801040.SI', '801120.SI'] industryLabel = ['休闲服务', '有色金属', '轻工制造', '采掘', '交通运输', '化工', '医药生物', '农林牧渔', '商业贸易', '综合', '通信', '电气设备', '纺织服装', '汽车', '房地产', '公用事业', \ '银行', '机械设备', '电子', '传媒', '非银金融', '建筑材料', '国防军工', '建筑装饰', '计算机', '家用电器', '钢铁', '食品饮料'] industryDic = { industryCode: industryName for industryCode, industryName in zip(industryList, industryLabel) } dfIndestryList = [] self.PrintInfoDemo.PrintLog("获取申万一级行业指数数据...") for indexCode in industryList: industryDf = self.GetDataFromWindAndMySqlDemo.getHQData( tempCode=indexCode, startDate=startDate, endDate=endDate) industryDf.rename(columns={'close_price': indexCode}, inplace=True) dfIndestryList.append(industryDf) totalIndustryDf = pd.concat(dfIndestryList, axis=1) dicResult['totalIndustryDf'] = totalIndustryDf dicResult['industryDic'] = industryDic self.PrintInfoDemo.PrintLog("获取申万一级行业指数数据成功!") # 风格指数 styleList = ['801863.SI', '801822.SI', '801813.SI', '801831.SI', '801812.SI', '801821.SI', '801852.SI', '801842.SI', '801843.SI', '801832.SI', '801851.SI', \ '801853.SI', '801841.SI', '801833.SI', '801823.SI', '801811.SI'] styleLabel = [ '新股指数', '中市盈率指数', '小盘指数', '高市净率指数', '中盘指数', '高市盈率指数', '微利股指数', '中价股指数', '低价股指数', '中市净率指数', '亏损股指数', '绩优股指数', '高价股指数', '低市净率指数', '低市盈率指数', '大盘指数' ] styleDic = { sylteCode: styleName for sylteCode, styleName in zip(styleList, styleLabel) } dfStyleList = [] self.PrintInfoDemo.PrintLog("获取风格指数数据...") for indexCode in styleList: styleDf = self.GetDataFromWindAndMySqlDemo.getHQData( tempCode=indexCode, startDate=startDate, endDate=endDate) styleDf.rename(columns={'close_price': indexCode}, inplace=True) dfStyleList.append(styleDf) totalStyleDf = pd.concat(dfStyleList, axis=1) dicResult['totalStyleDf'] = totalStyleDf dicResult['styleDic'] = styleDic self.PrintInfoDemo.PrintLog("获取风格指数数据成功") return dicResult def getRiskFree(self): if self.netValuePeriod == 'W': riskFree = 0.02 / 52 else: riskFree = 0.02 / 250 return riskFree def calcAndPlotSaveRiskReturn(self, dicNetValueResult, resultPath): ''' 计算并保存指定周期的风险收益指标 绘图 :param dicNetValueResult: :return: ''' fundIndexDf = pd.concat([ dicNetValueResult['netValuedf']['acc_net_value'], dicNetValueResult['indexDf'] ], axis=1, join='inner') fundIndexDf.rename( columns={'acc_net_value': dicNetValueResult['fundName']}, inplace=True) fundPlotDf = fundIndexDf.rename(columns=self.indexNameDic) CalcRiskReturnDemo = CalcRiskReturn() self.PrintInfoDemo.PrintLog("计算日频数据相关结论...") CalcRiskReturnDemo.calcRiskReturn(fundPlotDf, resultPath) marketVolume = dicNetValueResult['totalVolumeDf'] CalcRiskReturnDemo.plotDayNetValueFigure(fundPlotDf, resultPath, fundName=self.fundName, netPeriod=self.netValuePeriod, marketVolume=marketVolume) startDate = fundPlotDf.index.tolist()[-1] startDate = datetime.strptime(startDate, "%Y-%m-%d") endDate = startDate + timedelta(days=31 * 3) tradeDayList = self.GetDataFromWindAndMySqlDemo.getTradeDay( startdate=startDate, endDate=endDate, Period=self.netValuePeriod) CalcRiskReturnDemo.getMentoCaloForecast(fundPlotDf, resultPath, tradeDayList, fundName=self.fundName) self.PrintInfoDemo.PrintLog("计算周频数据相关结论...") tradeWeekList = self.GetDataFromWindAndMySqlDemo.getTradeDay( startdate=fundPlotDf.index.tolist()[0], endDate=fundPlotDf.index.tolist()[-1], Period='W') weekFundPlotDf = fundPlotDf.loc[tradeWeekList].dropna(axis=0) CalcRiskReturnDemo.plotWeekNetValueFigure(weekFundPlotDf, resultPath, fundName=self.fundName) CalcRiskReturnDemo.calcWeekNetValueResult(weekFundPlotDf, resultPath, fundName=self.fundName) self.PrintInfoDemo.PrintLog("计算月频数据相关结论...") tradeMonthList = self.GetDataFromWindAndMySqlDemo.getTradeDay( startdate=fundPlotDf.index.tolist()[0], endDate=fundPlotDf.index.tolist()[-1], Period='M') monthFundPlotDf = fundPlotDf.loc[tradeMonthList].dropna(axis=0) CalcRiskReturnDemo.plotMonthNetValueFigure(monthFundPlotDf, resultPath, fundName=self.fundName) targetDf = fundPlotDf.copy() targetDf['无风险利率'] = self.getRiskFree() CalcRegressionDemo = CalcRegression() self.PrintInfoDemo.PrintLog("计算选股,择时能力相关结论...") CalcRegressionDemo.getSelectStockAndTime(targetDf, resultPath, fundName=self.fundName, netPeriod=self.netValuePeriod, benchMark='沪深300') self.PrintInfoDemo.PrintLog("计算行业,风格回归相关结论...") fundIndustryDf = pd.concat([ dicNetValueResult['netValuedf']['acc_net_value'], dicNetValueResult['totalIndustryDf'] ], axis=1, join='inner') fundIndustryDf.rename( columns={'acc_net_value': dicNetValueResult['fundName']}, inplace=True) fundIndustryDf['无风险利率'] = self.getRiskFree() CalcRegressionDemo.getIndustryRegression( fundIndustryDf, resultPath, fundName=self.fundName, industryDic=dicNetValueResult['industryDic']) fundIndustryDf = pd.concat([ dicNetValueResult['netValuedf']['acc_net_value'], dicNetValueResult['totalStyleDf'] ], axis=1, join='inner') fundIndustryDf.rename( columns={'acc_net_value': dicNetValueResult['fundName']}, inplace=True) fundIndustryDf['无风险利率'] = self.getRiskFree() CalcRegressionDemo.getStyleRegression( fundIndustryDf, resultPath, fundName=self.fundName, industryDic=dicNetValueResult['styleDic']) def getSavePath(self): ''' 获取保存产品分析结果的路径 :return: ''' totalFileList = os.listdir(os.getcwd() + r"\\分析结果\\") if self.fundName not in totalFileList: os.mkdir(path=os.getcwd() + r"\\分析结果\\%s\\" % self.fundName) resultPath = os.getcwd() + r"\\分析结果\\%s\\" % self.fundName return resultPath def getMain(self): dicNetValueResult = self.getNetValueDataDic() # 获取产品净值数据和指数数据 resultPath = self.getSavePath() #创建分析结果保存文件路径 # # FamaFrenchRegressionDemo = FamaFrenchRegression() # FamaFrenchRegressionDemo.calcResult(resultPath,dicNetValueResult['totalIndustryDf'],dicNetValueResult['industryDic']) # FamaFrenchRegressionDemo.calcMain(closePriceSe=dicNetValueResult['netValuedf']['acc_net_value'],resultPath=resultPath) self.calcAndPlotSaveRiskReturn(dicNetValueResult, resultPath) # 净值类统计结果,按统计周期分析与保存 JudgeTextDemo = JudgeText() JudgeTextDemo.getNetJudgeText(fundCode=self.fundCode, fundName=self.fundName, totalIndexName=self.totalIndexName) self.PrintInfoDemo.PrintLog("计算完成!")
class GetIndexData: def __init__(self): self.PrintInfoDemo = PrintInfo() def getData(self, indexCodeList=[], startDate='2006-01-01', endDate='2017-06-01', sourceFlag='wind'): indexDataDf = pd.DataFrame() if sourceFlag == 'wind': self.PrintInfoDemo.PrintLog( infostr='wind读取大类指数历史数据 indexDataDf%s' % endDate) w.start() indexData = w.wsd(codes=indexCodeList, fields=['close'], beginTime=startDate, endTime=endDate) if indexData.ErrorCode != 0: self.PrintInfoDemo.PrintLog(infostr='wind获取指数数据失败,错误代码: ', otherInfo=indexData.ErrorCode) return indexDataDf indexDataDf = pd.DataFrame(indexData.Data, index=indexData.Codes, columns=indexData.Times).T self.PrintInfoDemo.PrintLog( infostr='wind读取大类指数历史数据成功,写入本地文件indexDataDf%s.xlsx' % endDate) else: self.PrintInfoDemo.PrintLog( infostr='ifind读取大类指数历史数据 indexDataDf%s ' % endDate) thsLogin = THS_iFinDLogin("zszq5072", "754628") if thsLogin not in [0, -201]: self.PrintInfoDemo.PrintLog('登录ifind失败,请检查!') return indexDataDf codeListStr = ','.join(indexCodeList) indicators = 'ths_close_price_index' indicatorParams = '' params = 'Days:Tradedays,Fill:Previous,Interval:D' data = THS_DateSerial(codeListStr, indicators, indicatorParams, params, startDate, endDate) if data['errorcode'] != 0: self.PrintInfoDemo.PrintLog(infostr='ifind获取指数数据失败,错误代码: ', otherInfo=data['errorcode']) return indexDataDf tData = THS_Trans2DataFrame(data) thsLogout = THS_iFinDLogout() dfList = [] for code, tempdf in tData.groupby(by=['thscode']): tempdf.set_index('time', drop=True, inplace=True) tempFianlDf = tempdf.rename(columns={ indicators: code }).drop(labels=['thscode'], axis=1) dfList.append(tempFianlDf) indexDataDf = pd.concat(dfList, axis=1, join='inner') self.PrintInfoDemo.PrintLog( infostr='ifind读取大类指数历史数据成功,写入本地文件indexDataDf%s.xlsx' % endDate) excelpath = r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\indexDataDf" + "%s.xlsx" % endDate writer = pd.ExcelWriter(excelpath) indexDataDf.to_excel(writer) writer.save() return indexDataDf def getDataWindFind(self, indexCodeList=[], startDate='2006-01-01', endDate='2017-06-01'): indexDataDf = self.getData(indexCodeList=indexCodeList, startDate=startDate, endDate=endDate, sourceFlag='wind') if indexDataDf.empty: indexDataDf = self.getData(indexCodeList=indexCodeList, startDate=startDate, endDate=endDate, sourceFlag='ifind') return indexDataDf def getHisData(self, indexCodeList=[], startDate='2006-01-01', endDate='2017-06-01'): if not indexCodeList: self.PrintInfoDemo.PrintLog('未传入指数参数,请检查!') return self.PrintInfoDemo.PrintLog('获取大类历史指数数据: ', otherInfo=indexCodeList) self.PrintInfoDemo.PrintLog('数据最新获取日期: ', otherInfo=endDate) try: excelPath = r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\indexDataDf" + "%s.xlsx" % endDate indexDataDf = pd.read_excel(excelPath) self.PrintInfoDemo.PrintLog(infostr='本地读取大类指数历史数据 indexDataDf%s ' % endDate) lostIndex = [ indexCode for indexCode in indexCodeList if indexCode not in indexDataDf ] if not lostIndex: indexDataDf = indexDataDf[indexCodeList] else: indexDataDf = self.getDataWindFind(indexCodeList=indexCodeList, startDate=startDate, endDate=endDate) except: indexDataDf = self.getDataWindFind(indexCodeList=indexCodeList, startDate=startDate, endDate=endDate) if indexDataDf.empty: self.PrintInfoDemo.PrintLog('获取历史数据失败!') return indexDataDf
class GetDataFromWindAndMySql: def __init__(self): self.wsetData = [ "000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH" ] # 要获取数据的证券代码 self.indexFieldName = [ "open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn" ] # 要获取的数据字段 self.fundFieldName = ["nav", "NAV_acc", "sec_name"] self.stockFieldName = [ "open", "high", "low", "close", "volume", "amt", "turn", "mkt_cap_ard", "pe_ttm", "ps_ttm", "pb_lf" ] self.engine = MysqlCon().getMysqlCon(flag='engine') self.conn = MysqlCon().getMysqlCon(flag='connect') self.PrintInfoDemo = PrintInfo() self.GetDataToMysqlDemo = GetDataToMysql() def getIndexConstituent(self, indexCode='000300.SH', getDate='2019-06-06'): ''' 获取指数成分股 :param indexCode: :param getDate: :return: ''' sqlStr = "select * from index_constituent where index_code='%s' and update_time='%s'" % ( indexCode, getDate) resultDf = pd.read_sql(sql=sqlStr, con=self.engine) if resultDf.empty: wsetdata = w.wset("indexconstituent", "date=%s;windcode=%s" % (getDate, indexCode)) if wsetdata.ErrorCode != 0: self.PrintInfoDemo.PrintLog("获取指数成分股数据有误,错误代码" + str(wsetdata.ErrorCode)) return pd.DataFrame() resultDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields).T dateList = [ datetampStr.strftime('%Y-%m-%d') for datetampStr in resultDf['date'].tolist() ] resultDf['date'] = dateList nameDic = { 'date': 'adjust_time', 'wind_code': 'stock_code', "sec_name": 'stock_name', 'i_weight': 'stock_weight' } resultDf.rename(columns=nameDic, inplace=True) resultDf['update_time'] = getDate resultDf['index_code'] = indexCode self.GetDataToMysqlDemo.GetMain(resultDf, 'index_constituent') return resultDf def getLackDataToMySql(self, tempCode, startDate, endDate, tableFlag='index'): if tableFlag == 'index': tableStr = 'index_value' codeName = 'index_code' elif tableFlag == 'fund': tableStr = 'fund_net_value' codeName = 'fund_code' elif tableFlag == 'stock': tableStr = 'stock_hq_value' codeName = 'stock_code' sqlStr = "select max(update_time),min(update_time) from %s where %s='%s'" % ( tableStr, codeName, tempCode) cursor = self.conn.cursor() cursor.execute(sqlStr) dateStrTuple = cursor.fetchall()[0] maxDate = dateStrTuple[0] minDate = dateStrTuple[1] if not maxDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) return if endDate < minDate or startDate > minDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) elif startDate <= minDate: if minDate <= endDate < maxDate: if startDate != minDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) elif endDate >= maxDate: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) if endDate != maxDate: self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) elif endDate > maxDate: self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) def getDataFromWind(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index'): if tableFlag == 'index': tableStr = 'index_value' nameDic = { "OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price", "VOLUME": "volume", "AMT": "amt", "CHG": "chg", "PCT_CHG": "pct_chg", "TURN": "turn" } fields = self.indexFieldName codeName = 'index_code' elif tableFlag == 'fund': tableStr = 'fund_net_value' nameDic = { "NAV": "net_value", "NAV_ACC": "acc_net_value", "SEC_NAME": "fund_name" } fields = self.fundFieldName codeName = 'fund_code' elif tableFlag == 'stock': tableStr = 'stock_hq_value' nameDic = { "OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price", "VOLUME": "volume", "AMT": "amt", "TURN": "turn", "MKT_CAP_ARD": "market_value", "PE_TTM": "pe_ttm", "PS_TTM": "ps_ttm", "PB_LF": "pb_lf" } fields = self.stockFieldName codeName = 'stock_code' wsetdata = w.wsd(codes=tempCode, fields=fields, beginTime=startDate, endTime=endDate) if wsetdata.ErrorCode != 0: self.PrintInfoDemo.PrintLog("获取行情数据有误,错误代码" + str(wsetdata.ErrorCode)) return tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T tempDf[codeName] = tempCode tempDf['update_time'] = wsetdata.Times tempDf.rename(columns=nameDic, inplace=True) dateList = [ dateStr.strftime("%Y-%m-%d") for dateStr in tempDf['update_time'].tolist() ] tempDf['update_time'] = dateList self.GetDataToMysqlDemo.GetMain(tempDf, tableStr) return tempDf def getDataFromMySql(self, tempCode, startDate, endDate, tableFlag='index', nameList=['close_price']): if not nameList: self.PrintInfoDemo.PrintLog('传入获取指数的字段不合法,请检查!') if tableFlag == 'index': tableStr = 'index_value' codeName = 'index_code' elif tableFlag == 'fund': codeName = 'fund_code' tableStr = 'fund_net_value' elif tableFlag == 'stock': codeName = 'stock_code' tableStr = 'stock_hq_value' sqlStr = "select %s,update_time from %s where %s='%s' and update_time>='%s'" \ " and update_time<='%s'" % (','.join(nameList), tableStr, codeName, tempCode, startDate, endDate) resultDf = pd.read_sql(sql=sqlStr, con=self.engine) resultDf.set_index(keys='update_time', inplace=True, drop=True) resultDf = resultDf.drop_duplicates().sort_index() return resultDf def getCurrentNameData(self, tempCodeList, startDate, endDate, tableFlag='stock', nameStr='close_price'): ''' 获取指定字段的数据 ''' if tableFlag == 'stock': totalCodeStr = '' for stockCode in tempCodeList: totalCodeStr = totalCodeStr + stockCode + "','" sqlStr1 = "select max(update_time),min(update_time) from stock_hq_value where stock_code in ('%s')" % totalCodeStr[: -3] cursor = self.conn.cursor() cursor.execute(sqlStr1) dateStrTuple = cursor.fetchall()[0] maxDate = dateStrTuple[0] minDate = dateStrTuple[1] if not maxDate: for tempCode in tempCodeList: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) return else: if endDate < minDate or startDate > minDate: for tempCode in tempCodeList: self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) elif startDate <= minDate: if minDate <= endDate < maxDate: for tempCode in tempCodeList: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) elif endDate >= maxDate: for tempCode in tempCodeList: self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) elif endDate >= maxDate: for tempCode in tempCodeList: self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) sqlStr = "select %s,update_time,stock_code from stock_hq_value where stock_code in ('%s') and update_time<='%s' " \ "and update_time>='%s'" % (nameStr,totalCodeStr,endDate,startDate) resultDf = pd.read_sql(sql=sqlStr, con=self.engine) dfList = [] for code, tempDf in resultDf.groupby('stock_code'): df = pd.DataFrame(tempDf[nameStr].values, index=tempDf['update_time'], columns=[code]) dfList.append(df) resultDf = pd.concat(dfList, axis=1) return resultDf def getCurrentDateData(self, tempCodeList, getDate, tableFlag='stock', nameList=['close_price']): ''' 获取指定日期的截面数据 :return: ''' if tableFlag == 'stock': totalCodeStr = "" for stockCode in tempCodeList: totalCodeStr = totalCodeStr + stockCode + "','" sqlStr = "select * from stock_hq_value where stock_code in ('%s') and update_time='%s'" % ( totalCodeStr[:-3], getDate) resultDf = pd.read_sql(sql=sqlStr, con=self.engine) if resultDf.empty: codes = tempCodeList fields = self.stockFieldName tradeDate = getDate wssData = w.wss(codes=codes, fields=fields, options="tradeDate=%s;priceAdj=F;cycle=D" % tradeDate) if wssData.ErrorCode != 0: self.PrintInfoDemo.PrintLog("获取行情数据有误,错误代码" + str(wssData.ErrorCode)) return pd.DataFrame() tempDf = pd.DataFrame(wssData.Data, index=fields, columns=codes).T tempDf.dropna(inplace=True) if tempDf.empty: self.PrintInfoDemo.PrintLog("当前日期%s无行情" % getDate) return pd.DataFrame() tempDf['update_time'] = getDate nameDic = { "open": "open_price", "high": "high_price", "low": "low_price", "close": "close_price", "mkt_cap_ard": "market_value", } tempDf.rename(columns=nameDic, inplace=True) tempDf['stock_code'] = tempDf.index.tolist() self.GetDataToMysqlDemo.GetMain(tempDf, 'stock_hq_value') returnDf = tempDf[nameList] return returnDf else: resultDf.set_index('stock_code', drop=True, inplace=True) returnDf = resultDf[nameList] return returnDf def getHQData(self, tempCode, startDate='2019-03-01', endDate='2019-05-30', tableFlag='index', nameList=['close_price']): ''' #获取指数行情数据入口 ''' self.getLackDataToMySql(tempCode, startDate, endDate, tableFlag) resultDf = self.getDataFromMySql(tempCode, startDate, endDate, tableFlag=tableFlag, nameList=nameList) return resultDf def getTradeDay(self, startdate, endDate, Period=''): ''' 获取指定周期交易日,封装wind接口 :param Period: ''日,W周,M月,Q季,S半年,Y年 :return: ''' # w.start() data = w.tdays(beginTime=startdate, endTime=endDate, options="Period=%s" % Period) if data.ErrorCode != 0: self.PrintInfoDemo.PrintLog('wind获取交易日期错误,请检查!') return tradeDayList = data.Data[0] tradeDayList = [ tradeDay.strftime('%Y-%m-%d') for tradeDay in tradeDayList ] # w.close() return tradeDayList
def __init__(self): self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql() self.PrintInfoDemo = PrintInfo()
class GetFundPool: def __init__(self): self.PrintInfoDemo = PrintInfo() def getDataLocal(self,CodeList=[],dataFlag='Fund',method='NotBench'): ''' 从本地读取基金历史数据 :param CodeList: 代码列表 :return: 要读取的历史基金数据 ''' resultDf = pd.DataFrame() localPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\HistoryData\\" if dataFlag == 'Fund': localPath = localPath +r"FundNetValueDf\\" totalExcelNameList = os.listdir(localPath) if not totalExcelNameList: self.PrintInfoDemo.PrintLog('本地文件中未找到历史%s数据!'%dataFlag) return resultDf dfList = [] for excelName in totalExcelNameList: tempDf = pd.read_excel(localPath+excelName) dfList.append(tempDf) totalNetValueDf = pd.concat(dfList,axis=1,join='inner') targetList = list(set(totalNetValueDf.columns.tolist()).intersection(set(CodeList))) self.PrintInfoDemo.PrintLog('本地文件有数量:%s' % str(totalNetValueDf.shape[1])) self.PrintInfoDemo.PrintLog('目标数量:%s' % str(len(targetList))) resultDf = totalNetValueDf[targetList] return resultDf elif dataFlag=='Index': if method=='NotBench': localPath = localPath+"IndexValueDf.xlsx" try: resultDf = pd.read_excel(localPath) except: self.PrintInfoDemo.PrintLog('未读取到本地指数历史数据,请检查!') return resultDf else: localPath = localPath + r"benchMarkData\\" totalExcelNameList = os.listdir(localPath) if not totalExcelNameList: self.PrintInfoDemo.PrintLog('本地文件中未找到历史%s数据!' % dataFlag) return resultDf dfList = [] for excelName in totalExcelNameList: tempDf = pd.read_excel(localPath + excelName) dfList.append(tempDf) totalIndexValueDf = pd.concat(dfList, axis=1, join='outer') return totalIndexValueDf elif dataFlag=='InitFund': ''' 初始基金池,考虑量化账号数据请求限制问题,该部分暂由wind终端中手动“基金筛选”,后续可维护全市场程序筛选 目前筛选逻辑(保存在wind基金筛选—>我的方案):(1)成立年限<=2013-11-19;(2)基金规模>=6亿元 ''' localPath = localPath + "初始基金池.xlsx" resultDf = pd.read_excel(localPath) return resultDf elif dataFlag == 'InitIndex': localPath = localPath + "初始指数池.xlsx" resultDf = pd.read_excel(localPath) return resultDf def getFundNetData(self, fundCodeList=[], startDate='2006-01-01', endDate=date.today().strftime('%Y-%m-%d'),SourceFlag='Wind'): ''' 获取基金历史净值数据,ifind或wind :return:DataFrame ''' if not fundCodeList: self.PrintInfoDemo.PrintLog('获取的目标基金代码列表为空,请检查!') return pd.DataFrame() netValueDf = self.getDataLocal(CodeList=fundCodeList,dataFlag='Fund') if not netValueDf.empty: return netValueDf everyGrop = 10 if SourceFlag=='Wind': w.start() filed = 'NAV_adj' # 复权单位净值 group = 0 dfList = [] for fundNum in range(0,len(fundCodeList),everyGrop): group = group + 1 self.PrintInfoDemo.PrintLog('获取第%s组'%str(group)) if fundNum + everyGrop<len(fundCodeList): tempCodeList = fundCodeList[fundNum:fundNum+everyGrop] else: tempCodeList = fundCodeList[fundNum:] tempNetValue = w.wsd(codes=tempCodeList, fields=filed, beginTime=startDate, endTime=endDate,options='Fill=Previous') if tempNetValue.ErrorCode != 0: self.PrintInfoDemo.PrintLog(infostr='wind读取基金净值数据失败,错误代码: ', otherInfo=tempNetValue.ErrorCode) return pd.DataFrame() tempNetValueDf = pd.DataFrame(tempNetValue.Data, index=tempNetValue.Codes, columns=tempNetValue.Times).T writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\FundNetValueDF\\"+"复权单位净值_Group%s.xlsx"%(str(group))) tempNetValueDf.to_excel(writer) writer.save() dfList.append(tempNetValueDf) w.close() netValueDf = pd.concat(dfList,axis=1,join='outer') return netValueDf else: thsLogin = THS_iFinDLogin("zszq5072", "754628") if thsLogin not in [0, -201]: self.PrintInfoDemo.PrintLog('登录ifind失败,请检查!') return pd.DataFrame() group = 0 dfNetList = [] for fundNum in range(0, len(fundCodeList), everyGrop): group = group + 1 self.PrintInfoDemo.PrintLog('获取第%s组' % str(group)) if fundNum + everyGrop < len(fundCodeList): tempCodeList = fundCodeList[fundNum:fundNum + everyGrop] else: tempCodeList = fundCodeList[fundNum:] codeListStr = ','.join(tempCodeList) indicators = 'adjustedNAV' params = 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous' data = THS_HistoryQuotes(codeListStr, indicators, params, startDate, endDate) if data['errorcode'] != 0: self.PrintInfoDemo.PrintLog(infostr='ifind获取指数数据失败,错误代码: ', otherInfo=data['errorcode']) return pd.DataFrame() tData = THS_Trans2DataFrame(data) dfListIn = [] for code, tempdf in tData.groupby(by=['thscode']): tempdf.set_index('time', drop=True, inplace=True) tempFianlDf = tempdf.rename(columns={indicators: code}).drop(labels=['thscode'], axis=1) dfListIn.append(tempFianlDf) tempNetValueDf = pd.concat(dfListIn, axis=1, join='outer') writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\HistoryData\\FundNetValueDF\\" + "复权单位净值_Group%s.xlsx" % ( str(group))) tempNetValueDf.to_excel(writer) writer.save() dfNetList.append(tempNetValueDf) thsLogout = THS_iFinDLogout() netValueDf = pd.concat(dfNetList, axis=1, join='outer') return netValueDf def getIndexData(self,indexCodeList=[],startDate='2006-01-01', endDate=date.today().strftime('%Y-%m-%d'),SourceFlag='Wind',method='NotBench'): ''' 获取指数历史数据 :param indexCodeList: 指数代码列表 :param startDate: 指数开始时间 :param endDate: 指数截止时间 :param SourceFlag: 获取数据的来源标签 :return: DataFrame ''' if not indexCodeList: self.PrintInfoDemo.PrintLog('获取的目标指数代码列表为空,请检查!') return pd.DataFrame() indexDf = self.getDataLocal(CodeList=indexCodeList,dataFlag='Index',method=method) if not indexDf.empty: return indexDf if SourceFlag == 'Wind': w.start() filed = 'close' tempIndexValue = w.wsd(codes=indexCodeList, fields=filed, beginTime=startDate, endTime=endDate, options='') if tempIndexValue.ErrorCode != 0: self.PrintInfoDemo.PrintLog(infostr='wind读取指数数据失败,错误代码: ', otherInfo=tempIndexValue.ErrorCode) return pd.DataFrame() IndexValueDf = pd.DataFrame(tempIndexValue.Data, index=tempIndexValue.Codes, columns=tempIndexValue.Times).T writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\HistoryData\\" + "IndexValueDf.xlsx") IndexValueDf.to_excel(writer) writer.save() self.PrintInfoDemo.PrintLog(infostr='wind读取指数数据成功,存入本地文件 ') w.close() return IndexValueDf else: thsLogin = THS_iFinDLogin("zszq5072", "754628") if thsLogin not in [0, -201]: self.PrintInfoDemo.PrintLog('登录ifind失败,请检查!') return pd.DataFrame() codeListStr = ','.join(indexCodeList) indicators = 'ths_close_price_index' initParams='' params = 'Days:Tradedays,Fill:Previous,Interval:D' data = THS_DateSerial(codeListStr, indicators,initParams,params, startDate, endDate) if data['errorcode'] != 0: self.PrintInfoDemo.PrintLog(infostr='ifind获取指数数据失败,错误代码: ', otherInfo=data['errorcode']) return pd.DataFrame() tData = THS_Trans2DataFrame(data) dfListIn = [] for code, tempdf in tData.groupby(by=['thscode']): tempdf.set_index('time', drop=True, inplace=True) tempFianlDf = tempdf.rename(columns={indicators: code}).drop(labels=['thscode'], axis=1) dfListIn.append(tempFianlDf) IndexValueDf = pd.concat(dfListIn, axis=1, join='outer') writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\HistoryData\\" + "IndexValueDf.xlsx" ) IndexValueDf.to_excel(writer) writer.save() self.PrintInfoDemo.PrintLog(infostr='ifind读取指数数据成功,存入本地文件 ') thsLogout = THS_iFinDLogout() return IndexValueDf def getMain(self,method='NotBench'): fundPoolDf = self.getDataLocal(dataFlag='InitFund') netValueDf = self.getFundNetData(fundCodeList=fundPoolDf[u'证券代码'].tolist()) if netValueDf.empty: netValueDf = self.getFundNetData(fundCodeList=fundPoolDf[u'证券代码'].tolist(),SourceFlag='Ifind') self.PrintInfoDemo.PrintLog('获取基金历史净值数据完成! ') indexPoolDf = self.getDataLocal(dataFlag='InitIndex') indexValueDf = self.getIndexData(indexCodeList=indexPoolDf[u'证券代码'].tolist(),method=method) if indexValueDf.empty: indexValueDf = self.getIndexData(indexCodeList=indexPoolDf[u'证券代码'].tolist(), SourceFlag='Ifind') self.PrintInfoDemo.PrintLog('获取指数历史数据完成! ') dicResult = {} dicResult['fundPoolDf']=fundPoolDf dicResult['netValueDf'] = netValueDf dicResult['indexPoolDf'] = indexPoolDf dicResult['indexValueDf'] = indexValueDf return dicResult
class FundAnalyzeBenchMark: def __init__(self): self.PrintInfoDemo = PrintInfo() self.riskFree = 0 # 无风险利率 calcDate = {} # calcDate['oneMonth'] = (u'近一月', 21 * 1) calcDate['ThreeMonths'] = (u'近三月', 21 * 3) calcDate['SixMonths'] = (u'近六月', 21 * 6) calcDate['OneYear'] = (u'近一年', 21 * 12) calcDate['TwoYears'] = (u'近两年', 21 * 12 * 2) calcDate['ThreeYears'] = (u'近三年', 21 * 12 * 3) calcDate['TotalPeriod'] = (u'成立以来', np.inf) self.calcDate = calcDate self.weightFlag = False# npexp def getRollingClModel(self, fundIndexReturnDf, fundCode, indexCode): rollNum = 250 if fundIndexReturnDf.shape[0] > rollNum: RSquareList = [] alphaValueList = [] betaDiffList = [] interDf = int((fundIndexReturnDf.shape[0] - rollNum) / 10) for knum in range(rollNum, fundIndexReturnDf.shape[0], interDf): tempDf = fundIndexReturnDf.iloc[knum:knum + rollNum] tempDf['Y'] = tempDf[fundCode] - self.riskFree tempDf['X1'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X1'] < 0, 'X1'] = 0 tempDf['X2'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X2'] > 0, 'X2'] = 0 X = tempDf[['X1', 'X2']].values.reshape((-1, 2)) y = tempDf['Y'].values.reshape(-1) reg = LinearRegression().fit(X, y) RSquareList.append(reg.score(X, y)) alphaValueList.append(reg.intercept_) betaDiffList.append(reg.coef_[0] - reg.coef_[1]) RSquare = np.mean(RSquareList) alphaValue = np.mean(alphaValueList) betaDiff = np.mean(betaDiffList) else: tempDf = fundIndexReturnDf tempDf['Y'] = tempDf[fundCode] - self.riskFree tempDf['X1'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X1'] < 0, 'X1'] = 0 tempDf['X2'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X2'] > 0, 'X2'] = 0 X = tempDf[['X1', 'X2']].values.reshape((-1, 2)) y = tempDf['Y'].values.reshape(-1) reg = LinearRegression().fit(X, y) RSquare = reg.score(X, y) alphaValue = reg.intercept_ betaDiff = reg.coef_[0] - reg.coef_[1] LinearResult = {} LinearResult['RSquare'] = RSquare LinearResult['alphaValue'] = alphaValue LinearResult['betaDiff'] = betaDiff return LinearResult def getCLModel(self, fundIndexReturnDf, fundCode, indexCode): ''' C-L模型回归,得到基金的选股能力,择时能力 :param fundIndexReturnDf: :param fundCode: :param indexCode: :return: ''' tempDf = fundIndexReturnDf.copy() tempDf['Y'] = tempDf[fundCode] - self.riskFree tempDf['X1'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X1'] < 0, 'X1'] = 0 tempDf['X2'] = tempDf[indexCode] - self.riskFree tempDf.loc[tempDf['X2'] > 0, 'X2'] = 0 X = tempDf[['X1', 'X2']].values.reshape((-1, 2)) y = tempDf['Y'].values.reshape(-1) reg = LinearRegression().fit(X, y) RSquare = reg.score(X, y) alphaValue = reg.intercept_ betaDiff = reg.coef_[0] - reg.coef_[1] LinearResult = {} LinearResult['RSquare'] = RSquare LinearResult['alphaValue'] = alphaValue LinearResult['betaDiff'] = betaDiff return LinearResult def saveDfToExcel(self, tempDf, excelPath): writer = pd.ExcelWriter(excelPath) tempDf.to_excel(writer) writer.save() def getCorrMax(self, netValueDf, indexValueDf): ''' 获取每个基金与之相关性最强的指数 :param netValueDf: 基金历史净值数据 :param indexValueDf: 指数历史数据 :return: ''' dicCorr = {} for fundCode in netValueDf: corrValue = 0 dicCorr[fundCode] = {} for indexCode in indexValueDf: tempIndexFund = pd.concat([netValueDf[fundCode], indexValueDf[indexCode]], axis=1, join='inner') tempIndexFund = tempIndexFund.dropna() corr = tempIndexFund.corr().ix[0, 1] if corr > corrValue: corrValue = corr corrIndexCode = indexCode dicCorr[fundCode]['corrValue'] = corrValue dicCorr[fundCode]['indexCode'] = corrIndexCode corrIndexDf = pd.DataFrame(dicCorr) excelPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\" self.saveDfToExcel(corrIndexDf, excelPath + "corrIndexDf.xlsx") self.PrintInfoDemo.PrintLog('相关基准指数计算完成!') def getPeriodAnalyze(self, netValueDf, indexValueDf, Period='ThreeMonths'): calcResult = {} tempIndexValue = indexValueDf.copy() tempIndexValue.dropna(axis=1,how='all',inplace=True) dateList = [dateStr.strftime("%Y-%m-%d")for dateStr in tempIndexValue.index.tolist()] tempIndexValue = pd.DataFrame(tempIndexValue.values,index=dateList,columns=tempIndexValue.columns) calcDateSort = sorted(self.calcDate.items(), key=lambda x: x[1][1], reverse=False) calcFinal = False for periodData in calcDateSort: self.PrintInfoDemo.PrintLog('当前回归周期为:%s' % periodData[1][0]) if calcFinal: break valueNum = periodData[1][1] if netValueDf.shape[0] < valueNum: periodNetValueDf = netValueDf calcFinal = True else: if np.isinf(valueNum): periodNetValueDf = netValueDf else: periodNetValueDf = netValueDf.iloc[-valueNum:] calcResult[periodData[1][0]] = self.getAnalyzeToExcel(periodNetValueDf, tempIndexValue, period=periodData[1][0]) return calcResult def getAnalyzeToExcel(self, netValueDf, indexValueDf, period='成立以来'): ''' 对所有基金,所有指数循环回归,并将结果存入本地 :return: ''' # self.getCorrMax(netValueDf,indexValueDf) dicCLRSquare = {} dicCLAlpha = {} dicClBetaDiff = {} calcTime = 0 for fundCode in netValueDf: calcTime = calcTime + 1 self.PrintInfoDemo.PrintLog('总回归基金数量:%s,当前基金:%s,剩余回归基金数量:%s' % (str(netValueDf.shape[1]), fundCode, str(netValueDf.shape[1] - calcTime))) indexCode = fundCode[:6]+'BI.WI' if indexCode not in indexValueDf: continue dicCLRSquare[fundCode] = {} dicCLAlpha[fundCode] = {} dicClBetaDiff[fundCode] = {} fundIndexDf = pd.concat([netValueDf[fundCode], indexValueDf[indexCode]], axis=1, join='inner') fundIndexDf = fundIndexDf.dropna() fundIndexReturnDf = (fundIndexDf - fundIndexDf.shift(1)) / fundIndexDf.shift(1) fundIndexReturnDf = fundIndexReturnDf.fillna(0) dicClResult = self.getCLModel(fundIndexReturnDf, fundCode, indexCode) dicCLRSquare[fundCode]= dicClResult['RSquare'] dicCLAlpha[fundCode] = dicClResult['alphaValue'] dicClBetaDiff[fundCode] = dicClResult['betaDiff'] RSquareDf = pd.Series(dicCLRSquare) RSquareDf.name = 'RSquare' alphaValueDf = pd.Series(dicCLAlpha) alphaValueDf.name='Alpha' betaDiffDf = pd.Series(dicClBetaDiff) betaDiffDf.name='betaDiff' resultDf = pd.concat([RSquareDf,alphaValueDf,betaDiffDf],axis=1) excelPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\Bench\\" self.saveDfToExcel(resultDf, excelPath + r"%s_totalDf.xlsx" % period) dicAny = {} dicAny['resultDf'] = resultDf dicAny['netValueDf'] = netValueDf dicAny['indexValueDf'] = indexValueDf return dicAny def getAnyResult(self): ''' 从本地获取分析结果 :return: ''' dicRegre = {} localPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\" try: for key, periodStr in self.calcDate.items(): resultPath = localPath + r"Bench\\%s_totalDf.xlsx" % periodStr[0] dicRegre[periodStr[0]] = pd.read_excel(resultPath) netValueDfPath = localPath + "netValueDf.xlsx" netValueDf = pd.read_excel(netValueDfPath) indexValueDfPath = localPath + "indexBenchValueDf.xlsx" indexValueDf = pd.read_excel(indexValueDfPath) dicRegre['netValueDf'] = netValueDf dicRegre['indexValueDf'] = indexValueDf except: self.PrintInfoDemo.PrintLog('未读取到本地回归分析结果数据,请检查!') return dicRegre def deepAnalyze(self, dicAny): dicAlphaAndBetaDf = {} for key, periodStr in self.calcDate.items(): resultDf = dicAny[periodStr[0]].copy() resultDf['alphaValueRank'] = resultDf['Alpha'].rank(ascending=False) resultDf['betaDiffValueRank'] = resultDf['betaDiff'].rank(ascending=False) resultDf['totalRank'] = resultDf[['alphaValueRank', 'betaDiffValueRank']].sum(axis=1) resultDf.sort_values('totalRank', inplace=True) dicAlphaAndBetaDf[periodStr[0]] = resultDf['totalRank'] rankDf = pd.DataFrame(dicAlphaAndBetaDf) calcDateSort = sorted(self.calcDate.items(), key=lambda x: x[1][1], reverse=False) if self.weightFlag: dicWeight = {} for dateTu in range(len(calcDateSort)): dicWeight[calcDateSort[dateTu][1][0]] = np.exp((len(calcDateSort)-dateTu)/4) WeightSe = pd.Series(dicWeight) rankDf['finalRank'] = (rankDf*WeightSe).sum(axis=1) else: rankDf['finalRank'] = rankDf.sum(axis=1) rankDf.sort_values('finalRank', inplace=True) return rankDf def getStatistical(self, dicAny, regressDf): ''' 计算基金池风险收益指标,并观察期排序后各风险收益指标图的规律 :param dicAny: :param regressDf: :return: ''' demoRegre = regressDf fundCodeList = demoRegre.index.tolist() netValueDf = dicAny['netValueDf'] indexValueDf = dicAny['indexValueDf'] tempIndexValue = indexValueDf.copy() tempIndexValue.dropna(axis=1, how='all', inplace=True) dateList = [dateStr.strftime("%Y-%m-%d") for dateStr in tempIndexValue.index.tolist()] tempIndexValue = pd.DataFrame(tempIndexValue.values, index=dateList, columns=tempIndexValue.columns) CalcRiskReturnToExcelDemo = CalcRiskReturnToExcel() dicResult = {} for key, periodStr in CalcRiskReturnToExcelDemo.calcDate.items(): dicResult[periodStr[0]] = {} dicResult[periodStr[0]]['annualReturn'] = [] dicResult[periodStr[0]]['annualStd'] = [] dicResult[periodStr[0]]['maxDown'] = [] dicResult[periodStr[0]]['sharpRate'] = [] dicResult[periodStr[0]]['calmaRate'] = [] for fundCode in fundCodeList: demoNetDf = netValueDf[fundCode] indexCode = fundCode[:6] + 'BI.WI' tempDf = pd.concat([demoNetDf, tempIndexValue[indexCode]], axis=1, join='inner') tempDf = tempDf.dropna() tempReturn = (tempDf - tempDf.shift(1)) / tempDf.shift(1) tempReturn.fillna(0, inplace=True) dateList = [datetime.strptime(dateStr, "%Y-%m-%d") for dateStr in tempReturn.index.tolist()] tempReturn = pd.DataFrame(tempReturn.values, index=dateList, columns=tempReturn.columns) resultIndicator = CalcRiskReturnToExcelDemo.GoMain(tempReturn) for key, periodStr in CalcRiskReturnToExcelDemo.calcDate.items(): dicResult[periodStr[0]]['annualReturn'].append( resultIndicator.loc[periodStr[0], fundCode]['年化收益']-resultIndicator.loc[periodStr[0], indexCode]['年化收益']) dicResult[periodStr[0]]['annualStd'].append(resultIndicator.loc[periodStr[0], fundCode]['年化波动']) dicResult[periodStr[0]]['maxDown'].append(resultIndicator.loc[periodStr[0], fundCode]['最大回撤']) dicResult[periodStr[0]]['sharpRate'].append(resultIndicator.loc[periodStr[0], fundCode]['夏普比率']) dicResult[periodStr[0]]['calmaRate'].append(resultIndicator.loc[periodStr[0], fundCode]['卡玛比率']) annualReturnList = [] sharpRateList = [] annualStdList = [] maxDownList = [] for stPeriod, indicator in dicResult.items(): annualReturnList.append(pd.DataFrame(indicator['annualReturn'], index=fundCodeList, columns=[stPeriod])) sharpRateList.append(pd.DataFrame(indicator['sharpRate'], index=fundCodeList, columns=[stPeriod])) annualStdList.append(pd.DataFrame(indicator['annualStd'], index=fundCodeList, columns=[stPeriod])) maxDownList.append(pd.DataFrame(indicator['annualStd'], index=fundCodeList, columns=[stPeriod])) annualReturnDf = pd.concat(annualReturnList, axis=1) sharpRateDf = pd.concat(sharpRateList, axis=1) annualStdDf = pd.concat(annualStdList, axis=1) maxDownDf = pd.concat(maxDownList, axis=1) fig = plt.figure(figsize=(16, 9)) tempDf = annualReturnDf.copy() tempDf = tempDf.drop('近一月',axis=1) axNum = 0 rowNum = int(np.ceil(tempDf.shape[1]/2)) colNum = 2 for dateLabel in tempDf.columns.tolist(): axNum = axNum+1 ax = fig.add_subplot(int(str(rowNum)+str(colNum)+str(axNum))) tempDf['Y'] = tempDf[dateLabel] tempDf['X'] = list(range(annualReturnDf.shape[0])) X = tempDf[['X']].values.reshape((-1, 1)) y = tempDf['Y'].values.reshape(-1) reg = LinearRegression().fit(X, y) tempDf['LineRegress'] = reg.predict(X ) tempDf.plot(ax=ax,kind='scatter',x='X',y=dateLabel) tempDf['LineRegress'].plot(ax=ax,color='r') plt.tight_layout() localPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\Bench\\" plt.savefig(localPath + '样本基金收益统计图.png') # ax.set_title('年化收益') # ax2 = fig.add_subplot(222) # annualStdDf.plot(ax=ax2) # ax2.set_title('年化波动') # # ax3 = fig.add_subplot(223) # sharpRateDf.plot(ax=ax3) # ax3.set_title('夏普比率') # # ax4 = fig.add_subplot(224) # maxDownDf.plot(ax=ax4) # ax4.set_title('最大回撤') plt.show() def plotDemo(self, dicAny, regressDf,PlotFig='before'): longDf = dicAny[u'成立以来'].copy() longDfSe = longDf[longDf['RSquare'] > 0.3] totalFundCode = [code for code in regressDf.index.tolist() if code in longDfSe.index.tolist()] regressDf = regressDf.ix[totalFundCode,:] if PlotFig=='before': demoRegre = regressDf.iloc[:6] nameStr = '前六' else: nameStr='后六' demoRegre = regressDf.iloc[-6:] fundCodeList = demoRegre.index.tolist() netValueDf = dicAny['netValueDf'] indexValueDf = dicAny['indexValueDf'] tempIndexValue = indexValueDf.copy() tempIndexValue.dropna(axis=1, how='all', inplace=True) dateList = [dateStr.strftime("%Y-%m-%d") for dateStr in tempIndexValue.index.tolist()] tempIndexValue = pd.DataFrame(tempIndexValue.values, index=dateList, columns=tempIndexValue.columns) CalcRiskReturnToExcelDemo = CalcRiskReturnToExcel() localPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\Bench\\" fig = plt.figure(figsize=(16, 9)) plotNum = 0 for fundCode in fundCodeList: plotNum = plotNum + 1 demoNetDf = netValueDf[fundCode] indexCode = fundCode[:6]+'BI.WI' tempDf = pd.concat([demoNetDf, tempIndexValue[indexCode]], axis=1, join='inner') tempDf = tempDf.dropna() tempReturn = (tempDf - tempDf.shift(1)) / tempDf.shift(1) tempReturn.fillna(0, inplace=True) dateList = [datetime.strptime(dateStr, "%Y-%m-%d") for dateStr in tempReturn.index.tolist()] tempReturn = pd.DataFrame(tempReturn.values, index=dateList, columns=tempReturn.columns) CalcRiskReturnToExcelDemo.GoMain(tempReturn, toExcelPath=localPath + '%s.xls' % fundCode) axNum = fig.add_subplot(int('32' + str(plotNum))) (1 + tempReturn).cumprod().plot(ax=axNum) axNum.grid() plt.tight_layout() plt.savefig(localPath + '%s走势图.png' % nameStr) plt.show() dfList = [] for code in fundCodeList: riskDf = pd.read_excel(localPath + '%s.xls' % code, ) riskDf['统计周期'].fillna(method='pad', inplace=True) riskDf = riskDf.set_index([u'统计周期', u'指标']) dfList.append(riskDf) totalDf = pd.concat(dfList, axis=1) self.saveDfToExcel(totalDf, localPath + "totalDf_%s.xlsx" % PlotFig) def getMain(self): dicAny = self.getAnyResult() if not dicAny: self.PrintInfoDemo.PrintLog('获取基金净值和指数数据...') GetFundPoolDemo = GetFundPool() self.dicResult = GetFundPoolDemo.getMain(method='Bench') if self.dicResult['netValueDf'].empty or self.dicResult['indexValueDf'].empty: self.PrintInfoDemo.PrintLog('历史数据获取失败,请检查!') return localPath = r"C:\\Users\\lenovo\\PycharmProjects\\FundPoolSelect\\GetDataSource\\AnalyzeDAta\\" self.saveDfToExcel(self.dicResult['netValueDf'], localPath + "netValueDf.xlsx") self.saveDfToExcel(self.dicResult['indexValueDf'], localPath + "indexBenchValueDf.xlsx") self.PrintInfoDemo.PrintLog('获取基金净值和指数数据成功!') # dicAny = self.getAnalyzeToExcel(self.dicResult['netValueDf'], self.dicResult['indexValueDf'],period='成立以来') dicAny = self.getPeriodAnalyze(self.dicResult['netValueDf'], self.dicResult['indexValueDf']) regressDf = self.deepAnalyze(dicAny) self.getStatistical(dicAny=dicAny, regressDf=regressDf)
def __init__(self): self.startDate = '2006-01-01' self.endDate = '2017-06-01' # 回测截止时间 self.plotFlag = False # 是否绘图 self.PrintInfoDemo = PrintInfo() # 日志信息模块
# -*- coding: UTF-8 -*- import numpy as np import pandas as pd from scipy.optimize import minimize from PrintInfo import PrintInfo # import matplotlib.pyplot as plt PrintInfoDemo = PrintInfo() def get_smart_weight(returnDf,initX, method='mean_var',wts_adjusted=False,**modelParam): # PrintInfoDemo.PrintLog('IndexAllocation : ',method) ''' 功能:输入协方差矩阵,得到不同优化方法下的权重配置 输入: cov_mat pd.DataFrame,协方差矩阵,index和column均为资产名称 method 优化方法,可选的有min variance、risk parity、max diversification、equal weight 输出: pd.Series index为资产名,values为weight PS: 依赖scipy package ''' cov_mat = returnDf.cov() if not isinstance(cov_mat, pd.DataFrame): raise ValueError('cov_mat should be pandas DataFrame!') omega = np.matrix(cov_mat.values) # 协方差矩阵 def MaxDrawdown(return_list):
class GetProductData: def __init__(self): self.PrintInfoDemo = PrintInfo() # 获取基金池的基本信息 def getFundInfo(self, productList=[]): if not productList: self.PrintInfoDemo.PrintLog('未传入指数参数,请检查!') return try: fundInfoDf = pd.read_excel( r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\fundInfoDf.xlsx" ) self.PrintInfoDemo.PrintLog(infostr='本地读取基金历史信息数据 fundInfoDf') return fundInfoDf except: w.start() self.PrintInfoDemo.PrintLog(infostr='wind读取基金历史信息数据 fundInfoDf') codeList = [code + '.OF' for code in productList] filedList = [ 'fund_setupdate', 'fund_fundscale', 'fund_scaleranking', 'fund_mgrcomp', 'fund_type', 'fund_fundmanager', 'fund_structuredfundornot', 'fund_firstinvesttype', 'fund_investtype', 'fund_risklevel', 'fund_similarfundno', 'fund_manager_geometricavgannualyieldoverbench', 'risk_sharpe', 'fund_managementfeeratio', 'fund_fullname', 'fund_custodianfeeratio', 'NAV_periodicannualizedreturn', 'fund_manager_managerworkingyears', 'fund_benchmark', 'fund_benchindexcode', 'fund_initial' ] options = "fundType=3;order=1;returnType=1;startDate=20180813;endDate=20180913;period=2;riskFreeRate=1" fundInfo = w.wss(codes=codeList, fields=filedList, options=options) if fundInfo.ErrorCode != 0: self.PrintInfoDemo.PrintLog(infostr='wind读取基金历史信息数据失败,错误代码:', otherInfo=fundInfo.ErrorCode) return pd.DataFrame() fundInfoDf = pd.DataFrame(fundInfo.Data, index=fundInfo.Fields, columns=codeList).T writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\fundInfoDf.xlsx" ) fundInfoDf.to_excel(writer) writer.save() self.PrintInfoDemo.PrintLog( infostr='wind读取基金历史信息数据成功,写入本地文件fundInfoDf.xlsx') return fundInfoDf # 获取基金池的历史净值数据 def getFundNetValue(self, startTime, productList=[]): if not productList: self.PrintInfoDemo.PrintLog('未传入指数参数,请检查!') return try: fundNetValueDF = pd.read_excel( r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\fundNetValueDF.xlsx" ) self.PrintInfoDemo.PrintLog(infostr='本地读取基金净值数据 fundNetValueDF') return fundNetValueDF except: w.start() self.PrintInfoDemo.PrintLog(infostr='wind读取基金净值数据 fundNetValueDF') codeList = [code + '.OF' for code in productList] filed = 'NAV_adj' # 复权单位净值 fundNetValue = w.wsd(codes=codeList, fields=filed, beginTime=startTime, endTime=datetime.today(), options='Fill=Previous') if fundNetValue.ErrorCode != 0: self.PrintInfoDemo.PrintLog(infostr='wind读取基金净值数据失败,错误代码: ', otherInfo=fundNetValue.ErrorCode) return pd.DataFrame() fundNetValueDf = pd.DataFrame(fundNetValue.Data, index=fundNetValue.Codes, columns=fundNetValue.Times).T fundNetValueDf[fundNetValueDf == -2] = np.nan writer = pd.ExcelWriter( r"C:\\Users\\lenovo\\PycharmProjects\\fundPortfolio\\GetHistoryData\\fundNetValueDF.xlsx" ) fundNetValueDf.to_excel(writer) writer.save() self.PrintInfoDemo.PrintLog( infostr='wind读取基金净值数据成功,写入本地文件fundNetValueDF.xlsx ') return fundNetValueDf
class AssetAllocationMain: def __init__(self): self.startDate = '2006-01-01' # self.endDate = '2017-06-01' # 回测截止时间 self.endDate = date.today().strftime('%Y-%m-%d') self.plotFlag = False # 是否绘图 self.PrintInfoDemo = PrintInfo() # 日志信息模块 def getParam(self): # 获取初始参数 assetIndex = {} # 大类资产指数 assetIndex['000016.SH'] = u'上证50' assetIndex['000300.SH'] = u'沪深300' assetIndex['000905.SH'] = u'中证500' # assetIndex['SPX.GI'] = u'标普500' assetIndex['CBA00601.CS'] = u'中债国债总财富指数' assetIndex['AU9999.SGE'] = u'黄金9999' return assetIndex # 回测资产配置 def calcAssetAllocation(self, method, IndexAllocationParam={}): pofolioList = [] # 组合业绩表现 weightList = [] # 组合各时间持仓 self.PrintInfoDemo.PrintLog(infostr='回测大类资产配置组合...... ') for k in range(250, self.indexReturnDf.shape[0], 21): datestr = self.indexReturnDf.index.tolist()[k] # self.PrintInfoDemo.PrintLog(infostr='回测当前日期: ',otherInfo=datestr) tempReturnDF = self.indexReturnDf.iloc[k - 250:k] if k == 250: initWeight = [1 / tempReturnDF.shape[1] ] * tempReturnDF.shape[1] initX = pd.Series(initWeight, index=tempReturnDF.columns) else: initX = weight if IndexAllocationParam: allocationParam = IndexAllocationParam['AllocationParam'] weight = IA.get_smart_weight(returnDf=tempReturnDF, method=method, initX=initX, wts_adjusted=False, allocationParam=allocationParam) else: weight = IA.get_smart_weight(returnDf=tempReturnDF, method=method, initX=initX, wts_adjusted=False) tempPorfolio = (weight * self.indexReturnDf.iloc[k:k + 21]).sum(axis=1) weight.name = datestr pofolioList.append(tempPorfolio) weightList.append(weight) totalPofolio = pd.concat(pofolioList, axis=0) totalPofolio.name = 'portfolio' weightDf = pd.concat(weightList, axis=1).T self.PrintInfoDemo.PrintLog(infostr='回测完成! ') return totalPofolio, weightDf def calcMain(self, method='mean_var', **IndexAllocationParam): # 主函数入口 self.assetIndex = self.getParam() GetIndexDataDemo = GetIndexData() indexDataDf = GetIndexDataDemo.getHisData(indexCodeList=list( self.assetIndex.keys()), startDate=self.startDate, endDate=self.endDate) # 收益率序列 self.indexReturnDf = (indexDataDf - indexDataDf.shift(1)) / indexDataDf.shift(1) # 组合业绩回测 totalPofolio, weightDf = self.calcAssetAllocation( method, IndexAllocationParam) return totalPofolio, weightDf
class AssetModelImprove: def __init__(self): self.PrintInfoDemo = PrintInfo() self.CalcRiskReturnToExcelDemo = CalcRiskReturnToExcel() def ResearchModel(self, calcNum=50, method='target_risk'): modelResult = {} fundPortfolioDemo = fundPortfolio() timeBack = 0 for rate in np.linspace(start=0, stop=1, num=calcNum): timeBack += 1 self.PrintInfoDemo.PrintLog('回测第%s次' % str(timeBack)) modelResult['rate=' + str(rate)] = fundPortfolioDemo.setMain( method=method, rate=rate) pickleFile = open(method + 'modelResult.pkl', 'wb') pickle.dump(modelResult, pickleFile) pickleFile.close() return modelResult #获取本地运行数据或运行程序 def getData(self, method): try: fileResult = open(method + 'modelResult.pkl', 'rb') modelResult = pickle.load(fileResult) except: modelResult = self.ResearchModel(method=method) dflist = [] dfReturnAndRisk = {} for keyRate in modelResult: tempSe = modelResult[keyRate]['pofolioAndBench'][u'投资组合'] tempSe.name = keyRate dflist.append(tempSe) dfReturnAndRisk[keyRate] = modelResult[keyRate]['riskReturndf'][ u'投资组合'].to_dict() dflist.append(modelResult[keyRate]['pofolioAndBench'] [u'60.0%沪深300+40.0%中债国债总财富指数']) returnAndRiskTotal = pd.DataFrame(dfReturnAndRisk) nameDic = { rateStr: float(rateStr[5:]) for rateStr in returnAndRiskTotal.columns } returnAndRiskTotal.rename(columns=nameDic, inplace=True) # 所有投资组合历史回测数据 portFolioDf = pd.concat(dflist, axis=1) return returnAndRiskTotal, portFolioDf #绘制风险收益指标与rate走势图 def researchRiskReturn(self, method, returnAndRiskTotal, newFolder): mulIndexList = returnAndRiskTotal.index.tolist() datePeriodList = list( np.unique([indexTu[0] for indexTu in mulIndexList])) riskReturnIndexList = list( np.unique([indexTu[1] for indexTu in mulIndexList])) for riskReturnIndex in riskReturnIndexList: fig = plt.figure(figsize=(16, 9)) figCol = 2 figRow = int(np.ceil(len(datePeriodList) / figCol)) figTime = 0 for datePeriod in datePeriodList: figTime = figTime + 1 figNum = int(str(figRow) + str(figCol) + str(figTime)) fig.tight_layout() axNum = fig.add_subplot(figNum) returnAndRiskTotal.loc[datePeriod].loc[riskReturnIndex].plot( ax=axNum) axNum.set_title(datePeriod) axNum.set_xlabel('rate') axNum.set_ylabel(riskReturnIndex) namePosition = newFolder + ('%s.png' % (method + riskReturnIndex)) plt.savefig(namePosition) # 所有投资组合历史回测数据研究与绘图 def researchTotalPortfolio(self, portFolioDf, method, newFolder): fig = plt.figure(figsize=(16, 9)) ax1 = fig.add_subplot(111) seletFiveList = list( range(2, portFolioDf.shape[1], int(portFolioDf.shape[1] / 5))) # 取5类组合 seletFiveAndBenchList = seletFiveList + [-1] targetDf = portFolioDf.ix[:, seletFiveAndBenchList] self.CalcRiskReturnToExcelDemo.GoMain(targetDf, toExcelPath=newFolder + u'五类风险等级.xls') (1 + targetDf).cumprod().plot(ax=ax1) ax1.set_title(u'不同rate走势对比图') namePosition = newFolder + ('%s.png' % (method + '不同rate下投资组合走势对比图')) plt.savefig(namePosition) return targetDf #投资组合与产品对比研究 def researchPortfolioFund(self, targetDf, usefulReturnDf, newFolder, method): fig = plt.figure(figsize=(16, 9)) figCol = 2 figRow = int(np.ceil(targetDf.shape[1] / figCol)) figTime = 0 for rateName in targetDf.columns: if rateName.find('rate') != -1: figTime = figTime + 1 figNum = int(str(figRow) + str(figCol) + str(figTime)) fig.tight_layout() tempDf = pd.concat([targetDf[rateName], usefulReturnDf], axis=1, join='inner') if figTime != targetDf.shape[1] - 1: axNum = fig.add_subplot(figNum) (1 + tempDf).cumprod().plot(ax=axNum, legend=False) else: if figTime % 2 == 1: figNum = int( str(figRow) + str(figCol - 1) + str(figRow)) axNum = fig.add_subplot(figNum) else: axNum = fig.add_subplot(figNum) upDateName = { colName: 'rate' for colName in tempDf.columns if colName.find('rate') != -1 } tempDf.rename(columns=upDateName, inplace=True) (1 + tempDf).cumprod().plot(ax=axNum) box = axNum.get_position() axNum.set_position( [box.x0, box.y0, box.width * 0.5, box.height]) axNum.legend(bbox_to_anchor=(1.1, 1), ncol=1) nameStr = str(round(float(rateName[5:]), 2)) axNum.set_title('rate = ' + nameStr) namePosition = newFolder + ('%s.png' % (method + u'不同投资组合与产品对比走势图')) plt.savefig(namePosition) # plt.show() #计算投资组合与产品历史风险收益指标 fundNameList = [ rateName for rateName in targetDf.columns if rateName.find('rate') != -1 ] nameDic = { rateName: rateName[:5] + str(round(float(rateName[5:]), 2)) for rateName in fundNameList } investAndFundDf = pd.concat([targetDf[fundNameList], usefulReturnDf], axis=1, join='inner') investAndFundDf.rename(columns=nameDic, inplace=True) self.CalcRiskReturnToExcelDemo.GoMain(investAndFundDf, toExcelPath=newFolder + u'五类风险等级与底层产品对比.xls') def calcResearch(self, method): #创建储存结果文件夹 fundPortfolioDemo = fundPortfolio() newFolder = fundPortfolioDemo.fileMake(u'用户分层投资组合' + method) #获取底层产品历史净值数据 SetPortfolioDemo = SetPortfolio() dicResult, resultDf = SetPortfolioDemo.goMain() nameDic = { keyName: SetPortfolioDemo.dicProduct[keyName[:-3]] for keyName in resultDf } resultDf.rename(columns=nameDic, inplace=True) usefulNetDf = resultDf.dropna(axis=0) usefulReturnDf = (usefulNetDf - usefulNetDf.shift(1)) / usefulNetDf.shift(1) usefulReturnDf.fillna(0, inplace=True) #获取所有投资组合历史收益及风险指标 returnAndRiskTotal, portFolioDf = self.getData(method=method) #获取五类风险等级走势,存储,并返回 targetDf = self.researchTotalPortfolio(portFolioDf, method, newFolder) #五类风险等级与底层产品走势图,存储 self.researchPortfolioFund(targetDf, usefulReturnDf, newFolder, method) #风险收益指标与rate走势图,存储 self.researchRiskReturn(method=method, returnAndRiskTotal=returnAndRiskTotal, newFolder=newFolder)
class fundPortfolio: def __init__(self): backDate = date.today().strftime('%Y-%m-%d') self.PrintInfoDemo = PrintInfo() #日志信息模块 self.PathFolder = r'C:\\Users\\lenovo\\Desktop\\资产配置研究\\' #存放回测结果的主文件夹 # 获取投资组合调仓期内的权重 def getPortfolioWeightDf(self, IndexWeightDf, dicResult, resultDf): usefulNetDf = resultDf.dropna(axis=0) timeList = usefulNetDf.index.tolist() # 找到第一个持仓日 totalDate = IndexWeightDf.index.tolist() for assetDate in totalDate: if assetDate >= timeList[0]: try: assetPreDate = totalDate.index(assetDate) - 1 usefulIndexWeightDf = IndexWeightDf.iloc[assetPreDate:] except: usefulIndexWeightDf = IndexWeightDf.loc[assetDate:] break else: self.PrintInfoDemo.PrintLog(infostr='可用净值日期最小值,大于大类资产可用日期,请检查 ') return adjustDateList = usefulIndexWeightDf.index.tolist() # 调仓日 positionList = [] for adjustDate in adjustDateList: fundWeightDic = {} tempDicIndexWeight = usefulIndexWeightDf.loc[adjustDate].to_dict() for indexCode in tempDicIndexWeight: fundWeightDic.update( self.getFundWeight(tempDicIndexWeight[indexCode], dicResult[indexCode])) # 大类的权重分配到产品中 tempDf = pd.DataFrame(fundWeightDic, index=[adjustDate]) positionList.append(tempDf) positionDf = pd.concat(positionList, axis=0) # 调仓周期内各资产权重 return positionDf, usefulNetDf # 大类权重分配到具体产品 def getFundWeight(self, assetWeight, fundCodeList, flag='equal'): dicResult = {} if flag == 'equal': dicResult = { code: assetWeight / len(fundCodeList) for code in fundCodeList } return dicResult # 回测投资组合状况 def backPofolio(self, positionDf, usefulNetDf): usefulNetReturnDf = (usefulNetDf - usefulNetDf.shift(1)) / usefulNetDf.shift(1) usefulNetReturnDf.fillna(0, inplace=True) portfolioBackList = [] positionDateList = positionDf.index.tolist() for dateNum in range(len(positionDateList) - 1): if dateNum == 0: startDate = usefulNetDf.index[0] else: startDate = positionDateList[dateNum] tempNetReturnDf = usefulNetReturnDf.loc[ startDate:positionDateList[dateNum + 1]] tempPorfolioReturn = ( tempNetReturnDf * positionDf.loc[positionDateList[dateNum]]).sum(axis=1) if dateNum == 0: portfolioBackList.append(tempPorfolioReturn) else: portfolioBackList.append(tempPorfolioReturn[1:]) portfolioSe = pd.concat(portfolioBackList, axis=0) portfolioSe.name = u'投资组合' return portfolioSe #文件管理 def fileMake(self, newFoldName): #检查指定路径是否存在存放结果的文件夹,不存在就新建 folder = os.path.exists(self.PathFolder) if not folder: os.makedirs(self.PathFolder) newFolder = self.PathFolder + newFoldName + "\\" if not os.path.exists(newFolder): os.makedirs(newFolder) return newFolder def getBigAsset(self, method, param): # 生成大类资产配置模块 self.PrintInfoDemo.PrintLog(infostr='大类资产配置模型 ', otherInfo=method) defineFlag = False if method == 'target_maxdown' or method == 'target_risk': if param: AllocationParam = param['rate'] else: AllocationParam = 0.3 nameStr = ' rate= ' + str(AllocationParam) # 图片标题名称和excel的sheet名称 defineFlag = True elif method == 'risk_parity': if param: AllocationParam = param['rate'] else: AllocationParam = 'equal' nameStr = ' rate= ' + str(AllocationParam) # 图片标题名称和excel的sheet名称 defineFlag = True else: nameStr = method AssetAllocationMainDemo = AssetAllocationMain() if defineFlag: totalPofolio, IndexWeightDf = AssetAllocationMainDemo.calcMain( method=method, AllocationParam=AllocationParam) else: totalPofolio, IndexWeightDf = AssetAllocationMainDemo.calcMain( method=method, ) self.PrintInfoDemo.PrintLog(infostr='大类资产配置模型初始化完成!') return AssetAllocationMainDemo, totalPofolio, IndexWeightDf, nameStr def getFundPool(self, AssetAllocationMainDemo, IndexWeightDf): # 生成目标基金产品池模块 self.PrintInfoDemo.PrintLog(infostr='生成目标基金产品池...... ') SetPortfolioDemo = SetPortfolio( assetIndex=AssetAllocationMainDemo.assetIndex, backDate=date.today().strftime('%Y-%m-%d')) dicResult, resultDf = SetPortfolioDemo.goMain() self.PrintInfoDemo.PrintLog(infostr='生成目标基金产品池完成! ') # 目标产品池基于大类回测权重,再次回测 self.PrintInfoDemo.PrintLog(infostr='目标产品池回测... ') positionDf, usefulNetDf = self.getPortfolioWeightDf( IndexWeightDf, dicResult, resultDf) portfolioSe = self.backPofolio(positionDf, usefulNetDf) self.PrintInfoDemo.PrintLog(infostr='目标产品池回测完成! ') return portfolioSe, positionDf, SetPortfolioDemo, usefulNetDf def riskAndReturnCalc(self, method, nameStr, pofolioAndBench, newFold): CalcRiskReturnToExcelDemo = CalcRiskReturnToExcel() filePath = newFold + '风险收益指标' + nameStr + '.xls' riskReturndf = CalcRiskReturnToExcelDemo.GoMain(pofolioAndBench, toExcelPath=filePath) self.PrintInfoDemo.PrintLog(infostr="投资组合风险收益指标: ", otherInfo=riskReturndf) return riskReturndf def plotFigureResult(self, nameStr, pofolioAndBench, tempPositionDf, newFold, labels): fig = plt.figure(figsize=(16, 9)) # fig.set_size_inches(6.4, 7.5) ax1 = fig.add_subplot(211) pofolioAndBenchAcc = (1 + pofolioAndBench).cumprod() pofolioAndBenchAcc.plot(ax=ax1) box = ax1.get_position() ax1.set_position([box.x0, box.y0, box.width * 1.02, box.height]) ax1.legend(bbox_to_anchor=(1.28, 0.8), ncol=1) ax1.grid() ax1.set_title(nameStr) ax2 = fig.add_subplot(212) color = [ '#36648B', '#458B00', '#7A378B', '#8B0A50', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5', '#FFEFDB' ] datestrList = [ datetime.strftime(dateStr, '%Y-%m-%d') for dateStr in tempPositionDf.index.tolist() ] for i in range(tempPositionDf.shape[1]): ax2.bar(datestrList, tempPositionDf.ix[:, i], color=color[i], bottom=tempPositionDf.ix[:, :i].sum(axis=1)) box = ax2.get_position() ax2.set_position([box.x0, box.y0, box.width * 1.02, box.height]) ax2.legend(labels=labels, bbox_to_anchor=(1, 0.8), ncol=1) for tick in ax2.get_xticklabels(): tick.set_rotation(90) plt.tight_layout() plt.savefig(newFold + ('%s.png' % (nameStr))) plt.show() def setMain(self, method='risk_parity', productFlag=True, **param): result = {} # 保留结果 AssetAllocationMainDemo, totalPofolio, IndexWeightDf, nameStr = self.getBigAsset( method=method, param=param) totalPofolio.name = u'大类资产组合' # 投资组合绘图与风险指标计算 indexReturnDf = AssetAllocationMainDemo.indexReturnDf indexDf1 = indexReturnDf[['000300.SH', 'CBA00601.CS']] indexDf1.rename(columns={ '000300.SH': u'沪深300', 'CBA00601.CS': u'中债国债总财富指数' }, inplace=True) weightSe = pd.Series([0.6, 0.4], index=['000300.SH', 'CBA00601.CS']) indexDf2 = (indexReturnDf[['000300.SH', 'CBA00601.CS']] * weightSe).sum(axis=1) indexDf2.name = u"%s沪深300+%s中债国债总财富指数" % ( str(weightSe['000300.SH'] * 100) + '%', str(weightSe['CBA00601.CS'] * 100) + '%') indexDf = pd.concat([indexDf1, indexDf2], axis=1, join='inner').fillna(0) newFold = self.fileMake(newFoldName=method) if productFlag: portfolioSe, positionDf, SetPortfolioDemo, usefulNetDf = self.getFundPool( AssetAllocationMainDemo, IndexWeightDf) pofolioAndBench = pd.concat([indexDf, portfolioSe, totalPofolio], axis=1, join='inner') labels = [ SetPortfolioDemo.dicProduct[code[:6]] for code in positionDf.columns.tolist() ] tempPositionDf = positionDf else: pofolioAndBench = pd.concat([indexDf, totalPofolio], axis=1, join='inner') labels = [ AssetAllocationMainDemo.assetIndex[code] for code in IndexWeightDf.columns.tolist() ] tempPositionDf = IndexWeightDf self.plotFigureResult(nameStr, pofolioAndBench, tempPositionDf, newFold, labels) riskReturndf = self.riskAndReturnCalc(method=method, nameStr=nameStr, pofolioAndBench=pofolioAndBench, newFold=newFold) result['pofolioAndBench'] = pofolioAndBench result['riskReturndf'] = riskReturndf result['positionDf'] = tempPositionDf return result
class FamaFrenchRegression: def __init__(self): self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql() self.PrintInfoDemo = PrintInfo() def getFacrotReturn(self, resultPath, dateList, indexCode): totalCodeSet = set({}) dicTempResult = {} self.PrintInfoDemo.PrintLog("获取宽基指数成分股,并计算产品起止日期内规模因子,账面市值因子的收益") self.PrintInfoDemo.PrintLog( "为减少接口频繁请求成分股数据导致掉线,这里每次调用后采用睡眠函数,间隔0.2秒..") for dateStr in dateList: universeDf = self.GetDataFromWindAndMySqlDemo.getIndexConstituent( indexCode=indexCode, getDate=dateStr) totalCodeSet = totalCodeSet.union( universeDf['stock_code'].to_dict().values()) tempStockDf = self.GetDataFromWindAndMySqlDemo.getCurrentDateData( tempCodeList=universeDf['stock_code'].tolist(), getDate=dateStr, tableFlag='stock', nameList=['close_price', 'market_value', 'pb_lf']) if tempStockDf.empty: continue dicTempResult[dateStr] = {} ME30 = np.percentile(tempStockDf['market_value'], 30) ME70 = np.percentile(tempStockDf['market_value'], 70) SM = tempStockDf[ tempStockDf['market_value'] <= ME30].index.tolist() BM = tempStockDf[tempStockDf['market_value'] > ME70].index.tolist() BP = tempStockDf[tempStockDf > 0].dropna() BP[['pb_lf']] = 1 / BP[['pb_lf']] BP30 = np.percentile(BP['pb_lf'], 30) BP70 = np.percentile(BP['pb_lf'], 70) LP = BP[BP['pb_lf'] <= BP30].index.tolist() HP = BP[BP['pb_lf'] > BP70].index.tolist() dicTempResult[dateStr]['SM'] = SM dicTempResult[dateStr]['BM'] = BM dicTempResult[dateStr]['LP'] = LP dicTempResult[dateStr]['HP'] = HP time.sleep(0.2) self.PrintInfoDemo.PrintLog("产品起止日期内规模因子,账面市值因子的收益计算完成") self.PrintInfoDemo.PrintLog("批量获取产品起止日期内的所有成分股行情数据...") totalStockCloseDf = self.GetDataFromWindAndMySqlDemo.getCurrentNameData( tempCodeList=list(totalCodeSet), startDate=dateList[0], endDate=dateList[-1], tableFlag='stock', nameStr='close_price') self.PrintInfoDemo.PrintLog("产品起止日期内的所有成分股行情数据获取完成!") dateSort = sorted(dicTempResult.items(), key=lambda x: x[0], reverse=False) dicResult = {} for num in range(1, len(dateSort)): dateStr = dateSort[num][0] preDateStr = dateSort[num - 1][0] dicCodeList = dateSort[num][1] dicResult[dateStr] = {} SMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['SM']] - totalStockCloseDf.ix[preDateStr, dicCodeList['SM']]) / \ totalStockCloseDf.ix[preDateStr, dicCodeList['SM']] SMMeanReturn = SMReturn.mean() BMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['BM']] - totalStockCloseDf.ix[ preDateStr, dicCodeList['BM']]) / \ totalStockCloseDf.ix[preDateStr, dicCodeList['BM']] BMMeanReturn = BMReturn.mean() LPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['LP']] - totalStockCloseDf.ix[ preDateStr, dicCodeList['LP']]) / \ totalStockCloseDf.ix[preDateStr, dicCodeList['LP']] LPMeanReturn = LPReturn.mean() HPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['HP']] - totalStockCloseDf.ix[ preDateStr, dicCodeList['HP']]) / \ totalStockCloseDf.ix[preDateStr, dicCodeList['HP']] HPMeanReturn = HPReturn.mean() dicResult[dateStr]['SMB'] = SMMeanReturn - BMMeanReturn dicResult[dateStr]['HML'] = LPMeanReturn - HPMeanReturn resultDf = pd.DataFrame(dicResult).T resultDf.to_excel(resultPath + '规模因子账面市值因子(%s成分股).xlsx' % indexCode) self.PrintInfoDemo.PrintLog("产品起止日期内的SMB,HML收益率计算完成,存入本地!") return resultDf def calcMain( self, closePriceSe, resultPath, indexCode='000016.SH', ): self.PrintInfoDemo.PrintLog("开始计算fama-french三因子模型,采用的宽基指数为%s" % indexCode) tempReturn = (closePriceSe - closePriceSe.shift(1)) / closePriceSe.shift(1) tempReturn.name = closePriceSe.name dateList = tempReturn.index.tolist() factorReturnDf = self.getFacrotReturn(resultPath, dateList=dateList, indexCode=indexCode) calcRusultDf = pd.concat([factorReturnDf, tempReturn], axis=1, join='inner') calcRusultDf.to_excel(resultPath + '三因子样本数据.xlsx')
def __init__(self): self.PrintInfoDemo = PrintInfo()
def __init__(self): backDate = date.today().strftime('%Y-%m-%d') self.PrintInfoDemo = PrintInfo() #日志信息模块 self.PathFolder = r'C:\\Users\\lenovo\\Desktop\\资产配置研究\\' #存放回测结果的主文件夹