def get_stock_fund_univ(dateFrom, dateTo, ifFlexible): # 2001010201000000 偏股混合型, 2001010101000000 普通股票型, 2001010204000000 灵活配置型 # ifFlexible = True表示否包含灵活配置型中的实际偏股型基金 # 这个地方从ChinaMutualFundDescription还是从ChinaMutualFundSector读取作为table a都没关系 # 结果是一样的,因为之前不涉及读取bchmk的数据所以直接从表ChinaMutualFundSector读的 sqlStr = 'select a.F_INFO_WINDCODE, b.S_INFO_SECTORENTRYDT, b.S_INFO_SECTOREXITDT, ' \ 'b.S_INFO_SECTOR, a.F_INFO_BENCHMARK from ChinaMutualFundDescription a ' \ 'left join ChinaMutualFundSector b ' \ 'on a.F_INFO_WINDCODE = b.F_INFO_WINDCODE ' \ 'where b.S_INFO_SECTOR in ' \ '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\') and ' \ 'b.S_INFO_SECTORENTRYDT <= {0} and ' \ '(b.S_INFO_SECTOREXITDT >= {1} or b.S_INFO_SECTOREXITDT is null) ' \ 'order by F_INFO_WINDCODE, S_INFO_SECTORENTRYDT'.format(dateTo, dateFrom) # entrydt is null and exitdt is null and cur_sign = # 1这种都是只有个代码但还未成立的,没有其他数据就不用选出来了 fundInfo = get_data_sql(sqlStr, 'wind') fundInfo.columns = [ 'Fund_Code', 'Date_In', 'Date_Out', 'Info_Sector', 'Bchmk' ] fundInfo['Date_In'] = pd.to_datetime(fundInfo['Date_In']) fundInfo['Date_Out'] = pd.to_datetime(fundInfo['Date_Out']) # 对零配型基金贴偏股或非偏股标签,把零配中的非股票先排除,再进行后面的时间对应操作 fundInfoStk = fundInfo[fundInfo['Info_Sector'] != '2001010204000000'] if ifFlexible: fundInfoMix = fundInfo[fundInfo['Info_Sector'] == '2001010204000000'] fundInfoMix = attach_stk_label(fundInfoMix)[0] fundInfoMix.drop('Stk_Weight', axis=1, inplace=True) fundInfo = pd.concat([fundInfoStk, fundInfoMix], axis=0, ignore_index=True) else: fundInfo = fundInfoStk.copy() if len(fundInfo) > 0: days = get_trading_days(dateFrom, dateTo) days['Date'] = days.index repNum = fundInfo.shape[0] repDaysNum = days.shape[0] daysRep = pd.concat([days] * repNum, ignore_index=True) daysRep.sort_values('Date', inplace=True) fundPosRep = pd.concat([fundInfo] * repDaysNum, ignore_index=True) daysRep = daysRep.reset_index(drop=True) fundPosRep = fundPosRep.reset_index(drop=True) assert all(daysRep.index == fundPosRep.index) fundPosFull = pd.concat([daysRep, fundPosRep], axis=1) resPos = fundPosFull.loc[ (fundPosFull['Date'] >= fundPosFull['Date_In']) & ((fundPosFull['Date'] <= fundPosFull['Date_Out']) | (pd.isnull(fundPosFull['Date_Out']))), ['Date', 'Fund_Code', 'Info_Sector']] resPos.reset_index(drop=True, inplace=True) else: resPos = pd.DataFrame([], columns=['Date', 'Fund_Code']) return resPos
def get_component_SW(dateFrom, dateTo): from public.getDataSQL import get_data_sql from public.getTradingDays import get_trading_days import pandas as pd # 获取全集,如果需要某个单独行业再自行筛选 exeStr = 'select s_info_windcode, S_CON_WINDCODE, S_CON_INDATE, S_CON_OUTDATE from ' \ 'SWIndexMembers where S_INFO_WINDCODE in ' \ '(select INDUSTRIESALIAS + \'.SI\' from ASHAREINDUSTRIESCODE where ' \ 'LEVELNUM = 2 and INDUSTRIESCODE like \'61%\') and ' \ '(S_CON_INDATE <= {} and (S_CON_OUTDATE >= {} or S_CON_OUTDATE is null))'.format(dateTo, dateFrom) # Note: 为什么这个地方Indate和Outdate都有等于号:纳入是当天的0点就开始,而剔除的话是截止到剔除日期的24:00结束 swComponent = get_data_sql(exeStr, 'wind') swComponent.columns = ['SW_Code', 'Stock_Code', 'In_Date', 'Out_Date'] swComponent['In_Date'] = pd.to_datetime(swComponent['In_Date']) swComponent['Out_Date'] = pd.to_datetime(swComponent['Out_Date']) if len(swComponent) > 0: # 调整指数数据格式为每天持仓 # 先repmat全集出来,再根据时间筛选符合要求的 repNum = swComponent.shape[0] tradingDays = get_trading_days(dateFrom, dateTo) tradingDays['Date'] = tradingDays.index repDaysNum = tradingDays.shape[0] tradingDays = pd.concat([tradingDays] * repNum, ignore_index=True) tradingDays.sort_values('Date', inplace=True) swComponentRep = pd.concat([swComponent] * repDaysNum, ignore_index=True) tradingDays = tradingDays.reset_index(drop=True) swComponentRep = swComponentRep.reset_index(drop=True) assert all(tradingDays.index == swComponentRep.index) swComponentFull = pd.concat([tradingDays, swComponentRep], axis=1) swComponentFull['Valid'] = (swComponentFull['Date'] >= swComponentFull['In_Date']) & \ ((swComponentFull['Date'] <= swComponentFull['Out_Date']) | (swComponentFull['Out_Date'].isnull())) # 当天进当天出 swComponent = swComponentFull[swComponentFull['Valid']] swComponent.reset_index( drop=True, inplace=True) # 数据筛选后必须reset index 不然index还是原来indexPosFull的 # # check NUM # count = indexPos.groupby(by = ['Date'])['Stock_Code'].count() # assert all((count <= 300) & (count >= 295)) swComponent = swComponent[['SW_Code', 'Date', 'Stock_Code']] else: swComponent = pd.DataFrame([], columns=['SW_Code', 'Date', 'Stock_Code']) swComponent.set_index(['Date', 'Stock_Code'], inplace=True) return swComponent
def get_trading_days(dateFrom, dateTo): assert isinstance(dateFrom, str), 'dateFrom should be str' assert isinstance(dateTo, str), 'dateTo should be str' exeStr = 'select TRADE_DAYS from dbo.ASHARECALENDAR where ' \ 'S_INFO_EXCHMARKET = \'SSE\' and TRADE_DAYS >= %s and TRADE_DAYS <= %s' % (dateFrom, dateTo) tradingDays = get_data_sql(exeStr, 'wind') tradingDays.columns = ['Date'] tradingDays['Date'] = pd.to_datetime(tradingDays['Date']) tradingDays = tradingDays.sort_values( 'Date') # 等价于tradingDays.sort_value('Date', inplace=True) tradingDays.set_index('Date', inplace=True) return tradingDays
def get_index_pos(indexCode, dateFrom, dateTo): exeStr = 'select S_INFO_WINDCODE, S_CON_WINDCODE, S_CON_INDATE, S_CON_OUTDATE, CUR_SIGN ' \ 'from dbo.AINDEXMEMBERS where S_INFO_WINDCODE = \'%s\' and' \ ' (S_CON_INDATE <= %s and ( S_CON_OUTDATE > %s or S_CON_OUTDATE is null))' % (indexCode, dateTo, dateFrom) indexPos = get_data_sql(exeStr, 'wind') indexPos.columns = [ 'Index_Code', 'Stock_Code', 'DateIn', 'DateOut', 'Cur_Label' ] indexPos['DateIn'] = pd.to_datetime(indexPos['DateIn']) indexPos['DateOut'] = pd.to_datetime(indexPos['DateOut']) if len(indexPos) > 0: # 调整指数数据格式为每天持仓 # 先repmat全集出来,再根据时间筛选符合要求的 repNum = indexPos.shape[0] tradingDays = get_trading_days(dateFrom, dateTo) tradingDays['Date'] = tradingDays.index repDaysNum = tradingDays.shape[0] tradingDays = pd.concat([tradingDays] * repNum, ignore_index=True) tradingDays.sort_values('Date', inplace=True) indexPosRep = pd.concat([indexPos] * repDaysNum, ignore_index=True) tradingDays = tradingDays.reset_index(drop=True) indexPosRep = indexPosRep.reset_index(drop=True) assert all(tradingDays.index == indexPosRep.index) indexPosFull = pd.concat([tradingDays, indexPosRep], axis=1) indexPosFull['Valid'] = (indexPosFull['Date'] >= indexPosFull['DateIn']) & \ ((indexPosFull['Date'] < indexPosFull['DateOut']) | (indexPosFull['DateOut'].isnull())) # 当天进当天出 indexPos = indexPosFull[indexPosFull['Valid']] indexPos.reset_index( drop=True, inplace=True) # 数据筛选后必须reset index 不然index还是原来indexPosFull的 # # check NUM # count = indexPos.groupby(by = ['Date'])['Stock_Code'].count() # assert all((count <= 300) & (count >= 295)) indexPos = indexPos[['Index_Code', 'Date', 'Stock_Code']] else: indexPos = pd.DataFrame([], columns=['Index_Code', 'Date', 'Stock_Code']) indexPos.set_index(['Date', 'Stock_Code'], inplace=True) # 这个地方index怎么设置是个问题,理论上index应该是唯一的 return (indexPos)
def basic_info_SW(): # 61开头是申万,levelnum=2表示一级 # exeStr = 'select INDUSTRIESCODE, INDUSTRIESNAME, INDUSTRIESALIAS from dbo.ASHAREINDUSTRIESCODE where LEVELNUM = 2 and INDUSTRIESCODE like \'61%\' and USED = 1' exeStr = 'select INDUSTRIESCODE, INDUSTRIESNAME, INDUSTRIESALIAS, USED from dbo.ASHAREINDUSTRIESCODE where LEVELNUM = 2 and INDUSTRIESCODE like \'61%\'' codePara = get_data_sql(exeStr, 'wind') codePara.columns = ['SW_Code', 'SW_Name', 'Wind_Code', 'Used_Label'] # codePara['Wind_Code_New'] = pd.Series(codePara['Wind_Code'] + '.SI') codePara.sort_values(['Used_Label', 'Wind_Code'], inplace=True) codePara.reset_index(drop=True, inplace=True) sectorName = [np.nan] * 6 + [ '消费', '周期上游', '周期中游', '周期中游', '周期上游', '成长', '消费', '消费', '消费', '消费', '消费', '周期中游', '周期中游', '大金融', '消费', '消费', '其他', '周期下游', '周期下游', '成长', '其他', '成长', '成长', '成长', '大金融', '大金融', '周期下游', '周期下游' ] codePara['Sector_Name'] = sectorName return (codePara)
def attach_stk_label(inputTable): assert all([x in inputTable.columns for x in ['Fund_Code', 'Bchmk']]) bchmkList = [ x if pd.isnull(x) else x.split('+') for x in inputTable['Bchmk'] ] bchmkDict = [] for iRow in range(len(bchmkList)): bchmkStringI = bchmkList[iRow] obj = {} if bchmkStringI is not None: for item in bchmkStringI: if len(re.findall(r'^([^*%]*)\*?(?:([1-9]\d?)%)?$', item)) > 0: x, y = re.findall(r'^([^*%]*)\*?(?:([1-9]\d?)%)?$', item)[0] obj[x] = y bchmkDict.append(obj) # 读取指数标签参数表: sqlStr = 'select Bchmk_Name, If_STK from paraBchmkType' indexLabel = get_data_sql(sqlStr, 'lhtz') indexLabel.columns = ['Bchmk_Name', 'If_STK'] indexLabel = indexLabel.set_index('Bchmk_Name')['If_STK'].to_dict() assert len(inputTable) == len(bchmkDict) stkWeight = [] newIndexName = [] sqlTable = pd.DataFrame() for iCode in range(len(bchmkDict)): bchmkI = bchmkDict[iCode] stkLabelI = [indexLabel.get(key) for key in bchmkI.keys()] weightI = [0.0 if w == '' else float(w) for w in bchmkI.values()] if None not in stkLabelI: stkWeightI = sum(np.array(stkLabelI) * np.array(weightI)) else: # 把None先粗略补齐,再把新指数名称保存下来,便于print到日志检查 newIndexName = np.array(list( bchmkI.keys()))[[x is None for x in stkLabelI]] addLabel = [] for i in range(len(newIndexName)): newIndexNameI = newIndexName[i] addLabelI = not ('债' in newIndexNameI or '存款' in newIndexNameI or '年化' in newIndexNameI) addLabel.append(addLabelI) # write to sql sqlTableI = pd.DataFrame( np.array([ newIndexName, addLabel, [ datetime.datetime.strftime( datetime.datetime.today(), '%Y-%m-%d') ] * len(addLabel) ]).transpose(), columns=['Bchmk_Name', 'If_STK', 'Update_Time']) sqlTable = pd.concat([sqlTable, sqlTableI], axis=0, ignore_index=True) addIndex = np.where([x is None for x in stkLabelI])[0] newIndex = list(range(len(addLabel))) for i, j in zip(addIndex, newIndex): stkLabelI[i] = addLabel[j] stkWeightI = sum(np.array(stkLabelI) * np.array(weightI)) stkWeight.append(stkWeightI) sqlTable.drop_duplicates(inplace=True) if len(sqlTable) > 0: engine = create_engine( "mssql+pymssql://lhtz:[email protected]:1433/lhtzdb") conn = engine.connect() sqlTable.to_sql('paraBchmkType', conn, index=False, if_exists='append') conn.close() print('新添加Bchmk_Name:{0} ...'.format(sqlTable)) else: print('没有需要新添加参数的Bchmk_Name...') res = inputTable.copy() res.loc[:, 'Stk_Weight'] = stkWeight res = res[res['Stk_Weight'] >= 50] return res, newIndexName
def get_trading_days2(dateFrom, dateTo, dateType): assert isinstance(dateFrom, str), 'dateFrom should be str' assert isinstance(dateTo, str), 'dateTo should be str' # 因为月末涉及最后一天标签问题,所以往前往后多读一些 dateFrom1 = pd.to_datetime(dateFrom) - pd.Timedelta(365, unit='D') dateTo1 = pd.to_datetime(dateTo) + pd.Timedelta(365, unit='D') dateFrom1 = datetime.strftime(dateFrom1, '%Y%m%d') dateTo1 = datetime.strftime(dateTo1, '%Y%m%d') dateType = dateType.lower() if dateType not in ('"week", "month", "quarter", "halfyear", "year"'): raise ValueError('wrong marktype, must be(' '"week", "month", "quarter", "halfyear", "year")') exeStr = 'select TRADE_DAYS from dbo.ASHARECALENDAR where ' \ 'S_INFO_EXCHMARKET = \'SSE\' and TRADE_DAYS >= %s and TRADE_DAYS <= %s' % (dateFrom1, dateTo1) tradingDays = get_data_sql(exeStr, 'wind') tradingDays.columns = ['Date'] tradingDays['Date'] = pd.to_datetime(tradingDays['Date']) tradingDays.sort_values('Date', inplace=True) tradingDays.reset_index(drop=True, inplace=True) # @2020.04.02貌似可以用x.is_year_end, x.is_quarter_end, x.is_month_end解决3个 # 添加周末、月末、季末、年末标签 # Week_Label需要做一个特殊处理,如果是1月份的Week标签是52,则年份需-1,12月份的Week标签是1,则年份需+1 tradingDays['Year_Num'] = [x.year for x in tradingDays['Date']] tradingDays['Quarter_Num'] = [x.quarter for x in tradingDays['Date']] tradingDays['Month_Num'] = [x.month for x in tradingDays['Date']] tradingDays['Week_Num'] = [x.week for x in tradingDays['Date']] tradingDays.loc[tradingDays['Quarter_Num'] <= 2, 'Halfyear_Num'] = 1 tradingDays.loc[tradingDays['Quarter_Num'] >= 3, 'Halfyear_Num'] = 2 tradingDays['Halfyear_Num'] = tradingDays['Halfyear_Num'].astype(int) # 这里不用for 循环应该怎么调整: for i in range(len(tradingDays)): y = tradingDays['Year_Num'][i] m = tradingDays['Month_Num'][i] w = tradingDays['Week_Num'][i] if m == 1 and w == 52: tradingDays.loc[i, 'Year_Num'] = y - 1 elif m == 12 and w == 1: tradingDays.loc[ i, 'Year_Num'] = y + 1 # 这里直接调整Year_Num没关系,因为后面Year_Label的时候直接用Date.year,Year_Num后来没用了 del y, m, w weekLabel = tradingDays['Year_Num'] * 100 + tradingDays['Week_Num'] tradingDays['Week_Label'] = weekLabel tradingDays['Month_Label'] = [ x.year * 100 + x.month for x in tradingDays['Date'] ] tradingDays['Quarter_Label'] = [ x.year * 100 + x.quarter for x in tradingDays['Date'] ] tradingDays['Year_Label'] = [x.year for x in tradingDays['Date']] tradingDays['Halfyear_Label'] = [ x.year * 100 for x in tradingDays['Date'] ] + tradingDays['Halfyear_Num'] weekEnd = tradingDays.groupby('Week_Label')['Date'].last() monthEnd = tradingDays.groupby('Month_Label')['Date'].last() quarterEnd = tradingDays.groupby('Quarter_Label')['Date'].last() halfyearEnd = tradingDays.groupby('Halfyear_Label')['Date'].last() yearEnd = tradingDays.groupby('Year_Label')['Date'].last() tradingDays.loc[:, 'If_Week_End'] = [ x in weekEnd.values for x in tradingDays['Date'].values ] tradingDays.loc[:, 'If_Month_End'] = [ x in monthEnd.values for x in tradingDays['Date'].values ] tradingDays.loc[:, 'If_Quarter_End'] = [ x in quarterEnd.values for x in tradingDays['Date'].values ] tradingDays.loc[:, 'If_Halfyear_End'] = [ x in halfyearEnd.values for x in tradingDays['Date'].values ] tradingDays.loc[:, 'If_Year_End'] = [ x in yearEnd.values for x in tradingDays['Date'].values ] # 筛选需要的标签 def selectData(i): switcher = { 'week': tradingDays[['Date', 'Week_Label', 'If_Week_End']], 'month': tradingDays[['Date', 'Month_Label', 'If_Month_End']], 'quarter': tradingDays[['Date', 'Quarter_Label', 'If_Quarter_End']], 'halfyear': tradingDays[['Date', 'Halfyear_Label', 'If_Halfyear_End']], 'year': tradingDays[['Date', 'Year_Label', 'If_Year_End']] } return switcher.get(dateType, "Invalid day of week") res = selectData(dateType) res = res[(res['Date'] >= pd.to_datetime(dateFrom)) & (res['Date'] <= pd.to_datetime(dateTo))] res.set_index('Date', inplace=True) return res
def get_new_fund_amount(dateFrom, dateTo, ifFlexible): # 因为要求MA,所以时间往前错一点 dateFromTemp = datetime.datetime.strftime( pd.to_datetime(dateFrom) - datetime.timedelta(50), '%Y%m%d') dateSeq = get_trading_days(dateFromTemp, dateTo) dateSeq = attach_date_label(pd.DataFrame(dateSeq), 'month') dateSeq = attach_date_label(dateSeq, 'week') weekSeq = dateSeq[dateSeq['If_Week_End']] # 这是全部的时间,后面把新发基金的数据merge到这个时间上 # 这个周度和月度是自然周和自然月,只在计算MA的分位数时候作为基准用 monthSeq = dateSeq[dateSeq['If_Month_End']] sqlStr = 'select a.F_INFO_WINDCODE, b.S_INFO_SECTOR, F_INFO_SETUPDATE, ' \ 'a.F_ISSUE_TOTALUNIT * a.F_INFO_PARVALUE as Collection, ' \ 'F_INFO_BENCHMARK from ChinaMutualFundDescription a ' \ 'left join ChinaMutualFundSector b ' \ 'on a.F_INFO_WINDCODE = b.F_INFO_WINDCODE ' \ 'where b.S_INFO_SECTOR in ' \ '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\')' stkFund = get_data_sql(sqlStr, 'wind') stkFund.columns = [ 'Fund_Code', 'Info_Sector', 'Setup_Date', 'Collection', 'Bchmk' ] stkFund['Setup_Date'] = pd.to_datetime(stkFund['Setup_Date']) stkFund['Collection'] = stkFund['Collection'].astype(float) stkFundStk = stkFund[stkFund['Info_Sector'] != '2001010204000000'] stkFundMix = stkFund[stkFund['Info_Sector'] == '2001010204000000'] if ifFlexible: stkFundMix = attach_stk_label(stkFundMix)[0] stkFundMix.drop('Stk_Weight', axis=1, inplace=True) stkFund = pd.concat([stkFundStk, stkFundMix], axis=0, ignore_index=True) del stkFundStk, stkFundMix else: stkFund = stkFundStk.copy() del stkFundStk stkFund.sort_values('Setup_Date', inplace=True) stkFund['Date'] = stkFund['Setup_Date'] stkFund.set_index('Date', inplace=True) # 07年之前的有些基金setup date是非交易日,不影响最近3年可不处理 stkFundWeek = attach_date_label(stkFund, 'week') stkFundMonth = attach_date_label(stkFund, 'month') weekCount = stkFundWeek.groupby('Week_Label')['Collection'].sum() monthCount = stkFundMonth.groupby('Month_Label')['Collection'].sum() weekCount = weekSeq.merge( pd.DataFrame(weekCount), how='left', on='Week_Label').set_index( weekSeq.index) # 做这一步merge因为有的周没有数据,应该取0处理,月度也需要(虽然月度还没有0的情况) # 这里merge完了Week_Label不知怎么就变成object了,需要调整一下,不然存hdf会报警告 # weekCount['Week_Label'] = weekCount['Week_Label'].astype(int) weekCount = weekCount[['Week_Label', 'Collection']] monthCount = monthSeq.merge(pd.DataFrame(monthCount), how='left', on='Month_Label').set_index(monthSeq.index) # monthCount['Month_Label'] = monthCount['Month_Label'].astype(int) monthCount = monthCount[['Month_Label', 'Collection']] weekCount['Collection'] = weekCount['Collection'].fillna(0) monthCount['Collection'] = monthCount['Collection'].fillna(0) # 筛选出过去3年的部分 dateFrom3Year = datetime.datetime.strftime( pd.to_datetime(dateTo) - datetime.timedelta(365 * 3), '%Y%m%d') weekCount = weekCount[weekCount.index >= pd.to_datetime(dateFrom3Year)] monthCount = monthCount[monthCount.index >= pd.to_datetime( dateFrom3Year)] # 求MA所处分位数的基准序列 画图(除最后一跟MA柱以外的部分) # 下面求5天MA和20天MA stkFundSum = stkFund.groupby('Date')['Collection'].sum() stkFundSum = dateSeq.merge(stkFundSum, how='left', on='Date') stkFundSum = pd.DataFrame(stkFundSum['Collection']) stkFundSum['Collection'] = stkFundSum['Collection'].fillna(0) stkFundSum['MA_5'] = stkFundSum['Collection'].rolling(window=5, min_periods=5).sum() stkFundSum['MA_20'] = stkFundSum['Collection'].rolling( window=20, min_periods=20).sum() # 用来画柱状图的序列 if stkFundSum.index[-1] > weekCount.index[-1]: weekCount = pd.concat([ weekCount[['Collection']], pd.DataFrame(stkFundSum.iloc[len(stkFundSum) - 1, :]).transpose()[[ 'MA_5' ]].rename(columns={'MA_5': 'Collection'}) ], axis=0) else: weekCount = weekCount[['Collection']] if stkFundSum.index[-1] > monthCount.index[-1]: monthCount = pd.concat([ monthCount[['Collection']], pd.DataFrame(stkFundSum.iloc[len(stkFundSum) - 1, :]).transpose()[[ 'MA_20' ]].rename(columns={'MA_20': 'Collection'}) ], axis=0) else: monthCount = monthCount[['Collection']] # 输出4个数值:过去一周和一月新发规模数据(两个数值)和对应在过去三年上的历史分位数(两个数值) res1 = stkFundSum['MA_5'][-1] res2 = stkFundSum['MA_20'][-1] res3 = weekCount['Collection'].rank(pct=True)[-1] res4 = monthCount['Collection'].rank(pct=True)[-1] fundRes = pd.DataFrame( { '过去一周新发规模': '{:.1f}亿'.format(res1), '过去一月新发规模': '{:.1f}亿'.format(res2), '过去一周新发规模分位数': '{:.1%}'.format(res3), '过去一月新发规模分位数': '{:.1%}'.format(res4) }, index=['value']) return [fundRes, weekCount, monthCount]
def get_fund_pos(dateFrom, dateTo, ifFlexible, bchmkCode): fundUniv = get_stock_fund_univ(dateFrom, dateTo, ifFlexible) # 普通股票型+混合偏股型基金全集 # 构造年中和年末日期序列 dateSeqHY = get_trading_days2(dateFrom, dateTo, 'halfyear') dateSeqHY = dateSeqHY[dateSeqHY['If_Halfyear_End']] # 筛选年中和年末的基金universe fundUnivHalfYear fundUnivHY = dateSeqHY.merge(fundUniv, how='left', on='Date', validate='one_to_many') fundUnivHY.drop(['Halfyear_Label', 'If_Halfyear_End'], axis=1, inplace=True) # 季度日期序列 dateSeqQ = get_trading_days2(dateFrom, dateTo, 'quarter') dateSeqQ = dateSeqQ[dateSeqQ['If_Quarter_End']] fundUnivQ = dateSeqQ.merge(fundUniv, how='left', on='Date', validate='one_to_many') fundUnivQ.drop(['Quarter_Label', 'If_Quarter_End'], axis=1, inplace=True) # 获取基金的仓位数据,join到univ上求均值即可 sqlStr = 'select S_INFO_WINDCODE, F_PRT_ENDDATE, F_PRT_STOCKTONAV, F_PRT_NETASSET, F_PRT_STOCKVALUE from ' \ 'ChinaMutualFundAssetPortfolio where F_PRT_ENDDATE >= {0} and F_PRT_ENDDATE <= {1}' \ 'order by S_INFO_WINDCODE, F_PRT_ENDDATE'.format(dateFrom, dateTo) fundPosition = get_data_sql(sqlStr, 'wind') fundPosition.columns = [ 'Fund_Code', 'Date', 'Stock_Pos', 'Fund_Value', 'Stk_Value' ] fundPosition['Date'] = pd.to_datetime(fundPosition['Date']) fundPosition['Stock_Pos'] = fundPosition['Stock_Pos'].astype(float) # Note: fundPosition的日期是报告日期,都是0630这种的,遇到0630不是交易日的就匹配不上 pd.merge_asof只能根据一个key,需要先split,比较麻烦 # 按月份匹配,如果有多于1条的数据,则取日期最新的 fundPosition['Month_Label'] = [ x.year * 100 + x.month for x in fundPosition['Date'] ] fundUnivQ['Month_Label'] = [ x.year * 100 + x.month for x in fundUnivQ['Date'] ] fundPosition.drop_duplicates(['Fund_Code', 'Month_Label'], keep='last', inplace=True) fundPosUnivQ = fundUnivQ.merge(fundPosition, how='left', on=['Month_Label', 'Fund_Code'], validate='one_to_one') # 去掉仓位低于40%的,这种一般是在建仓期 fundPosUnivQ = fundPosUnivQ[(fundPosUnivQ['Stock_Pos'] >= 40) | (np.isnan(fundPosUnivQ['Stock_Pos']))].copy() fundPosUnivQ.sort_values(['Date_x', 'Fund_Code'], inplace=True) fundPosTotalQ = fundPosUnivQ.groupby('Date_x')['Stock_Pos'].mean() fundPosTotalQ.index.names = ['Date'] # 这个总数据没有日期问题,只要有数据的一定是全的,不会只是前10名仓位 # 这个fundPosTotal就是所有偏股型基金总仓位的均值,是季度的,保存下来单独做一个表 posTableT = fundPosTotalQ.iloc[(len(fundPosTotalQ) - 4):len(fundPosTotalQ)] / 100 posTableT.index = [ datetime.datetime.strftime(x, '%Y/%m/%d') for x in posTableT.index ] posTableT = pd.DataFrame(posTableT).transpose() fundPosTotalHY = dateSeqHY.merge(fundPosTotalQ, how='left', on='Date') fundPosTotalHY.drop(['Halfyear_Label', 'If_Halfyear_End'], axis=1, inplace=True) # 基金持仓的分行业仓位 季报公布的是证监会分类标准,所以这里用底仓自己计算 # 分行业持仓,最新一期的有的基金公布年报,有的只公布的四季报,这种需要处理,处理时间????????? # 基金net asset要直接读,不能用行业仓位反除,四舍五入不准 # 先筛选出股票型+偏股型基金的底仓,重复的只保留最新 # 类别code和名称对应表:AShareIndustriesCode,参考用,查询数据不需要这个表 sqlStr = 'select a.S_INFO_WINDCODE, S_INFO_STOCKWINDCODE, a.F_PRT_ENDDATE, F_PRT_STKVALUE, ' \ 'F_PRT_STKVALUETONAV, b.F_PRT_NETASSET from ' \ 'ChinaMutualFundStockPortfolio a ' \ 'left join ChinaMutualFundAssetPortfolio b ' \ 'on a.S_INFO_WINDCODE = b.S_INFO_WINDCODE and a.F_PRT_ENDDATE = b.F_PRT_ENDDATE ' \ 'where a.S_INFO_WINDCODE in (' \ 'select F_INFO_WINDCODE from ChinaMutualFundSector where S_INFO_SECTOR in ' \ '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\')) ' \ 'and month(a.F_PRT_ENDDATE) in (6, 12) and ' \ 'a.F_PRT_ENDDATE >= {0} and a.F_PRT_ENDDATE <= {1}'.format(dateFrom, dateTo) # 大概筛选一下就可以,比不筛选稍快一点,也不用筛太精确,因为后面还会join到fundUniv上 # 直接用一个sql语句不好写,因为entrydate exitdate一个基金对应多条数据,leftjoin 都给弄上了不好汇总 # 这里面也有同一个报告期出现两次的,数据不一样,以最新日期为准,例如160512.SZ在20110623和20110630的两次只取20110630 # 这个地方sql省不了,如果不写S_INFO_WINDCODE in (select F_INFO_WINDCODE from ChinaMutualFundSector where ' \ # 'S_INFO_SECTOR in (\'2001010201000000\', \'2001010101000000\', \'2001010204000000\'))只join s_info_sector的话会导致有重复数据 # 这里选出来的数据,可能包含了一些中间符合标准,后期退出了的基金,不用担心,因为最后会再join到fundUniv上面 fundPosDetail = get_data_sql(sqlStr, 'wind') fundPosDetail.columns = [ 'Fund_Code', 'Stock_Code', 'Date', 'Stk_Values', 'Stk_Pct', 'Fund_Value' ] fundPosDetail['Date'] = pd.to_datetime(fundPosDetail['Date']) fundPosDetail[['Stk_Values', 'Stk_Pct', 'Fund_Value' ]] = fundPosDetail[['Stk_Values', 'Stk_Pct', 'Fund_Value']].astype(float) # 摘出半年度的时间clear_version (不能直接fundPosDetail.drop_duplicates因为本来就有很多duplicates必须摘出时间序列再剔除) halfYearDate = pd.DataFrame(fundPosDetail['Date'].unique(), columns=['Date']) halfYearDate['Month_Label'] = [ x.year * 100 + x.month for x in halfYearDate['Date'] ] halfYearDate.sort_values(['Date'], inplace=True) halfYearDate.drop_duplicates(['Month_Label'], keep='last', inplace=True) # tmp = fundPosDetail[[x in set(halfYearDate['Date']) for x in fundPosDetail['Date']]].copy() # 奇怪这个地方如果不加set就都是FALSE,而且这种筛选方式太慢 # https://stackoverflow.com/questions/48553824/checking-if-a-date-falls-in-a-dataframe-pandas halfYearDate['Valid'] = True fundPosDetail = fundPosDetail.merge(halfYearDate[['Date', 'Valid']], on='Date', how='left') fundPosDetail.loc[pd.isnull(fundPosDetail['Valid']), 'Valid'] = False fundPosDetail = fundPosDetail[fundPosDetail['Valid']].copy() fundPosDetail.reset_index(drop=True, inplace=True) fundPosDetail['Month_Label'] = [ x.year * 100 + x.month for x in fundPosDetail['Date'] ] fundUnivHY['Month_Label'] = [ x.year * 100 + x.month for x in fundUnivHY['Date'] ] fundPosDetail = fundUnivHY.merge( fundPosDetail, how='left', on=['Month_Label', 'Fund_Code'], validate='one_to_many') # Date_x是交易日,Date_y是报告日 fundPosDetail = fundPosDetail[[ 'Date_x', 'Fund_Code', 'Stock_Code', 'Stk_Values', 'Fund_Value' ]] fundPosDetail.columns = [ 'Date', 'Fund_Code', 'Stock_Code', 'Stk_Values', 'Fund_Value' ] # 贴行业标签计算行业权重 swComponent = get_component_SW(dateFrom, dateTo) fundPosDetail = fundPosDetail.merge(swComponent, how='left', on=['Date', 'Stock_Code']) uniqueNav = fundPosDetail.drop_duplicates(['Date', 'Fund_Code'], keep='first') totalNav = pd.DataFrame( uniqueNav.groupby('Date')['Fund_Value'].sum()) # 每天的基金总市值 fundPosSector = pd.DataFrame( fundPosDetail.groupby(['Date', 'SW_Code'])['Stk_Values'].sum()) # 每天分行业的总市值 fundPosSector['SW_Code'] = fundPosSector.index.get_level_values('SW_Code') fundPosSector = fundPosSector.merge(totalNav, how='left', on='Date') fundPosSector['SW_Pct'] = fundPosSector['Stk_Values'] / \ fundPosSector['Fund_Value'] # 行业标签添加中文名称 swPara = basic_info_SW() swPara['SW_Code'] = swPara['Wind_Code'] + '.SI' fundPosSector = fundPosSector.merge(swPara[['SW_Code', 'SW_Name']], how='left', on='SW_Code').set_index( fundPosSector.index) # @2020.04.17 添加Bchmk权重 sqlStr = 'select S_CON_WINDCODE, TRADE_DT, I_WEIGHT from ' \ 'AIndexHS300FreeWeight where S_INFO_WINDCODE = \'{0}\' and TRADE_DT >= {1} and ' \ 'TRADE_DT <= {2}'.format(bchmkCode, datetime.datetime.strftime(fundPosSector.index.min(), '%Y%m%d'), datetime.datetime.strftime(fundPosSector.index.max(), '%Y%m%d')) bchmkW = get_data_sql(sqlStr, 'wind') bchmkW.columns = ['Stock_Code', 'Date', 'Bchmk_W'] bchmkW['Date'] = pd.to_datetime(bchmkW['Date']) bchmkW['Bchmk_W'] = bchmkW['Bchmk_W'].astype(float) bchmkW = bchmkW.merge(swComponent, how='left', on=['Date', 'Stock_Code']) bchmkW = pd.DataFrame( bchmkW.groupby(['Date', 'SW_Code'])['Bchmk_W'].sum() / 100) fundPosSector = fundPosSector.merge(bchmkW, how='left', on=['Date', 'SW_Code']) fundPosSector['Bchmk_W'] = fundPosSector['Bchmk_W'].fillna(0) # 怎么判断当前的最新一期是否已经更新完数据,通过时间无法判断,只能通过结果反推,如果底仓汇总的结果和总数差异过大(>3%),就说明还没更新完 # 汇总的时候不要用行业汇总,直接用底仓汇总(事实上就算<3%了,一段时间应该也会继续更新缩小,diff最终应该小于1%) diff = pd.Series(fundPosDetail.groupby(['Date'])['Stk_Values'].sum() / totalNav['Fund_Value']) * 100 -\ fundPosTotalHY['Stock_Pos'] if abs(diff[-1]) > 3: fundPosSector = fundPosSector[ fundPosSector.index <= fundPosSector.index.unique()[-2]] fundPosTotal = fundPosTotalHY[ fundPosTotalHY.index <= fundPosTotalHY.index.unique()[-2]] assert all(fundPosSector.index.unique() == fundPosTotal.index.unique()) # 总仓位和各行业配置比例变动,输出表格结果 currPeriod = fundPosSector.index.unique()[-1] lastPeriod = fundPosSector.index.unique()[-2] sectorPctTable = fundPosSector.pivot_table(index='SW_Name', columns=fundPosSector.index, values='SW_Pct', aggfunc='first') bchmkPctTable = fundPosSector.pivot_table(index='SW_Name', columns=fundPosSector.index, values='Bchmk_W', aggfunc='first') assert all(sectorPctTable.index == bchmkPctTable.index) and all( sectorPctTable.columns == bchmkPctTable.columns) excessPctTable = sectorPctTable - bchmkPctTable # 本期 、本期相对上期变化,最终只显示当前还在用的行业 sectorPctTable = sectorPctTable.merge( swPara.loc[swPara['Used_Label'] == 1, ['SW_Name', 'Used_Label']], how='left', on='SW_Name') sectorPctTable.sort_values(['Used_Label', 'SW_Name'], ascending=False, inplace=True) # 把已经不用的行业放在最下面 sectorPctTable.reset_index(drop=True, inplace=True) # 历史仓位变化图 excessPctTable = excessPctTable.reindex(sectorPctTable['SW_Name']) sectorPctTableCurr = sectorPctTable[[ 'SW_Name', lastPeriod, currPeriod, 'Used_Label' ]].copy() bchmkPctTableCurr = excessPctTable[[lastPeriod, currPeriod]].copy() bchmkPctTableCurr.columns = ['上期超配', '本期超配'] sectorPctTableCurr.rename(columns={ lastPeriod: '上期仓位', currPeriod: '本期仓位' }, inplace=True) bchmkPctTableCurr.reset_index(drop=True, inplace=True) sectorPctTableCurr = pd.concat([sectorPctTableCurr, bchmkPctTableCurr], axis=1) sectorPctTableCurr = sectorPctTableCurr[sectorPctTableCurr['Used_Label'] == 1] del sectorPctTableCurr['Used_Label'] # 本期和上期仓位变化这个表就不显示已经废弃的行业了 # 总仓位就单独看,不和行业合到一个表了 posTable = sectorPctTableCurr.copy() posTable['Delta'] = posTable['本期仓位'] - posTable['上期仓位'] posTable['Delta_B'] = posTable['本期超配'] - posTable['上期超配'] del sectorPctTable['Used_Label'] return [posTableT, posTable, sectorPctTable]
from public.getStockFundUniv import get_stock_fund_univ from public.attachSTKLabel import attach_stk_label from public.getSWComponent import get_component_SW from public.basicInfoSW import basic_info_SW from public.attachDateLabel import attach_date_label import datetime ############################################ 融资融券数据 ###################### # 目前只需要两融余额总量、融资买入额这两个数据 dateFrom = '20100331' dateTo = datetime.datetime.strftime(datetime.datetime.today(), '%Y%m%d') sqlStr = 'select TRADE_DT, S_MARSUM_EXCHMARKET, S_MARSUM_TRADINGBALANCE, S_MARSUM_SECLENDINGBALANCE, ' \ 'S_MARSUM_MARGINTRADEBALANCE, S_MARSUM_PURCHWITHBORROWMONEY from ' \ 'AShareMarginTradeSum where TRADE_DT >= {0} and TRADE_DT <= {1} ' \ 'order by TRADE_DT, S_MARSUM_EXCHMARKET'.format(dateFrom, dateTo) marginTD = get_data_sql(sqlStr, 'wind') # margin trading data marginTD.columns = [ 'Date', 'Mkt', 'Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance', 'Buy' ] # Buy代表融资余额,Sell代表融券余额, Margin_TD_Balance是两融余额 marginTD['Date'] = pd.to_datetime(marginTD['Date']) marginTD[['Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance', 'Buy']] = marginTD[[ 'Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance', 'Buy' ]].apply(lambda x: x.astype(float)) marginTD.sort_values(['Mkt', 'Date'], inplace=True) marginTD['Buy_MA'] = marginTD.groupby('Mkt')['Buy'].rolling( window=10, min_periods=10).mean().values marginTD.to_hdf('dataForPlot/marginTDData.hdf', key='marginTD', type='w', format='table')