Python GetDataTotalMain Exemples, GetAndSaveWindData.GetDataTotalMain.GetDataTotalMain Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : GetFinanceReportData.py Projet : sufezl/EstimateValueData

class GetFinanceReportData:
    def __init__(self, dic_param, file_path=''):
        self.dic_param = dic_param
        self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        self.file_path = file_path

    def get_industry_sta(self, dic_df):
        '''
        基金持股行业统计
        :param dic_df:
        :return:
        '''
        dic_stock_weight = {}  #占股票投资市值比
        dic_net_value_weight = {}  #占净值比
        for rpt_date, temp_df in dic_df.items():
            total_code_list = temp_df['stock_code'].tolist()
            if rpt_date.find('中报') != -1:
                rpt_date_str = rpt_date[:4] + '0630'
            else:
                rpt_date_str = rpt_date[:4] + '1231'
            temp_new_df = temp_df.set_index('stock_code')
            df = self.GetDataTotalMainDemo.get_stock_industry(
                industry_flag='中证',
                code_list=total_code_list,
                industryType=1,
                tradeDate=rpt_date_str)
            temp_total_df = pd.concat([temp_new_df, df], axis=1, sort=True)
            dic_stock_weight[rpt_date_str] = {}
            dic_net_value_weight[rpt_date_str] = {}
            for industry, stock_df in temp_total_df.groupby(
                    df.columns.tolist()[0]):
                dic_stock_weight[rpt_date_str][industry] = stock_df[
                    'pro_total_stock_inve'].sum()
                dic_net_value_weight[rpt_date_str][industry] = stock_df[
                    'pro_net_value'].sum()
        stock_inves_rate_df = pd.DataFrame(dic_stock_weight).T.fillna(0) / 100
        net_value_rate_df = pd.DataFrame(dic_net_value_weight).T.fillna(
            0) / 100
        if self.file_path:
            stock_inves_rate_df.to_excel('占股票投资比例.xlsx')
            net_value_rate_df.to_excel("占净值比例.xlsx")

        fig1 = plt.figure(figsize=(16, 9))
        ax_inves = fig1.add_subplot(111)
        stock_inves_rate_df.plot(kind='bar')
        plt.show()
        return stock_inves_rate_df, net_value_rate_df

    def get_main(self):

        fund_contain_stock_df = self.GetDataTotalMainDemo.get_fund_report_data(
            fund_code=dic_param['fund_code'],
            start_date=dic_param['start_date'],
            end_date=dic_param['end_date'])
        dic_df = {}
        total_rpt_list = fund_contain_stock_df.sort_values(
            "rpt_date")['rpt_date'].tolist()
        for rpt_date, temp_df in fund_contain_stock_df.groupby(by='rpt_date'):
            dic_df[rpt_date] = temp_df
        self.get_industry_sta(dic_df)

Exemple #2

0

Afficher le fichier

    def getAssetIndexData(self, startDate, endDate):
        df_list = []
        GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        for asset_style, dic in self.assetIndex.items():
            for code in list(dic.keys()):
                index_df = GetDataTotalMainDemo.get_hq_data(
                    code,
                    start_date=startDate,
                    end_date=endDate,
                    code_style='index')
                index_df.rename(columns={"close_price": code}, inplace=True)
                df_list.append(index_df)
        index_data_df = pd.concat(df_list, axis=1, sort=True)

        # if '000300.SH' not in index_data_df:
        #     bench_df = GetDataTotalMainDemo.get_hq_data('000300.SH', start_date=startDate, end_date=endDate,
        #                                                 code_style='index')
        #     bench_df.rename(columns={"close_price": '000300.SH'}, inplace=True)
        #     index_data_df = pd.concat([index_data_df,bench_df],axis=1,sort=True)
        index_data_df.fillna(method='pad', inplace=True)

        # 收益率序列
        indexReturnDf = (index_data_df -
                         index_data_df.shift(1)) / index_data_df.shift(1)
        self.indexReturnDf = indexReturnDf
        return indexReturnDf

Exemple #3

0

Afficher le fichier

Fichier : JudgeFundIndexImprove.py Projet : sufezl/EstimateValueData

    def get_data(self, df):
        '''
        获取跟踪指数和产品复权单位净值数据
        :param df:
        :return:
        '''
        dic_fund_index = {}
        for fund_code in df.index.tolist():
            if df.loc[fund_code]['证券简称'].find('C') == -1:
                dic_fund_index[fund_code] = df.loc[fund_code]['跟踪指数代码']

        GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        dic_total_index_df = {}
        dic_result_df = {}
        for fund_code, index_code in dic_fund_index.items():
            start_date = df.loc[fund_code]['基金成立日']
            if datetime.today() - timedelta(days=365) < start_date:
                continue
            start_date = start_date.strftime("%Y-%m-%d")
            temp_fund_df = GetDataTotalMainDemo.get_hq_data(code=fund_code, code_style='fund', start_date=start_date,
                                                            end_date=self.last_date_str, name_list=['net_value_adj'])
            temp_fund_df.rename(columns={'net_value_adj': fund_code}, inplace=True)
            if index_code not in dic_total_index_df:
                temp_index_df = GetDataTotalMainDemo.get_hq_data(code=index_code, code_style='index',
                                                                 start_date=start_date, end_date=self.last_date_str,
                                                                 name_list=['close_price'])
                temp_index_df.rename(columns={'close_price': index_code}, inplace=True)
                dic_total_index_df[index_code] = temp_index_df
            else:
                temp_index_df = dic_total_index_df[index_code]
            dic_result_df[fund_code + '_' + index_code] = pd.concat([temp_fund_df, temp_index_df], axis=1, sort=True)
        return dic_result_df

Exemple #4

0

Afficher le fichier

Fichier : GetIndexEst.py Projet : sufezl/EstimateValueData

 def __init__(self):
     self.start_date = (datetime.today() -
                        timedelta(days=365 * 10)).strftime("%Y-%m-%d")
     # self.end_date = datetime.today().strftime("%Y-%m-%d")
     self.end_date = '2020-08-31'
     self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
     self.file_path = os.getcwd() + '\\GetDataResult\\估值\\'
     self.file_month_path = os.getcwd() + '\\GetDataResult\\月度表现\\'

Exemple #5

0

Afficher le fichier

    def setMain(self, method='risk_parity', productFlag=True, asset_index={}, best_param_dic={},product_name_dic={},fund_type='ETF'):
        AssetAllocationMainDemo, totalPofolio, IndexWeightDf, equalPortfolio, nameStr = self.getBigAsset(method=method,
                                                                                                         asset_index=asset_index,
                                                                                                         best_param_dic=best_param_dic)
        totalPofolio.name = u'大类资产组合'
        equalPortfolio.name = u'等权重组合'
        # target_date = '2020-03-05'
        target_date = self.startDate
        totalPofolio = totalPofolio[totalPofolio.index >= target_date]
        IndexWeightDf = IndexWeightDf[IndexWeightDf.index >= target_date]
        equalPortfolio = equalPortfolio[equalPortfolio.index >= target_date]
        portfolio_df = pd.concat([totalPofolio, equalPortfolio], axis=1, sort=True)

        # 投资组合绘图与风险指标计算
        bench_code = '000300.SH'
        bench_name = '沪深300'
        if bench_code in AssetAllocationMainDemo.indexReturnDf:
            indexDf1 = pd.DataFrame()
            indexDf1['沪深300'] = AssetAllocationMainDemo.indexReturnDf['000300.SH']
        else:
            GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
            indexDataDf1 = GetDataTotalMainDemo.get_hq_data(bench_code, start_date=self.startDate, end_date=self.endDate,
                                                        code_style='index')
            indexDataDf1.rename(columns={"close_price": bench_name}, inplace=True)
            indexDf1=indexDataDf1/indexDataDf1.shift(1)-1

        newFold = self.fileMake(newFoldName=method)
        asset_df = self.get_asset_weight(IndexWeightDf, AssetAllocationMainDemo.assetIndex)
        if productFlag:
            portfolioSe, positionDf, dicResult, usefulNetReturnDf = self.getFundPool(AssetAllocationMainDemo,
                                                                               IndexWeightDf, portfolio_df,method,product_name_dic=product_name_dic,fund_type=fund_type)

            pofolioAndBench = pd.concat([indexDf1, portfolioSe, portfolio_df], axis=1, join='inner')

            total_fund_name_dic = {}
            for index, dic in dicResult.items():
                total_fund_name_dic.update(dic)
            labels = [total_fund_name_dic[code] for code in positionDf.columns.tolist()]
            net_return_df = usefulNetReturnDf.copy()
            net_return_df.fillna(0, inplace=True)
            net_return_df.rename(columns=total_fund_name_dic, inplace=True)
            net_return_df = net_return_df[net_return_df.index >= target_date]
            self.riskAndReturnCalc(method=method, nameStr='基金产品风险收益指标', pofolioAndBench=net_return_df,
                                   newFold=newFold)
            tempPositionDf = positionDf
            # tempPositionDf.rename(columns=total_fund_name_dic,inplace=True)
        else:
            pofolioAndBench = pd.concat([indexDf1, portfolio_df], axis=1, join='inner')
            labels = [dic[code] for code in IndexWeightDf.columns.tolist() for
                      asset_style, dic in AssetAllocationMainDemo.assetIndex.items() if code in list(dic.keys())]
            tempPositionDf = IndexWeightDf

        asset_df.to_excel(newFold + "大类资产仓位表.xlsx")
        tempPositionDf.to_excel(newFold + "产品组合仓位表.xlsx")
        self.plotFigureResult(nameStr, pofolioAndBench, tempPositionDf, newFold, labels, asset_df)
        riskReturndf = self.riskAndReturnCalc(method=method, nameStr=nameStr, pofolioAndBench=pofolioAndBench,
                                              newFold=newFold)
        return

Exemple #6

0

Afficher le fichier

    def secondSelect(self, start_date, end_date, product_name_dic={},fund_type='ETF'):
        if not product_name_dic:
            dicResult = {}
            if fund_type=='ETF':
                dicResult['000300.SH'] = {"510300.SH": "300ETF"}
                dicResult['000905.SH'] = {"510500.SH": "500ETF"}
                dicResult['SPX.GI'] = {"513500.SH": "标普500"}
                dicResult['HSI.HI'] = {"159920.SZ": "恒生ETF"}
                dicResult['SPSIOP.SPI'] = {"162411.SZ": "华宝油气"}
                dicResult['AU9999.SGE'] = {'518800.SH': "黄金ETF"}
                dicResult['H00140.SH'] = {'511010.SH': "国债ETF "}
                dicResult['000852.SH'] = {'512100.SH': "1000ETF "}
            else:
                # dicResult['000300.SH'] = {"002987.OF": "广发沪深300ETF联接C"}
                dicResult['000300.SH'] = {"161207.OF": "国投瑞银瑞和沪深300指数"}
                # dicResult['000905.SH'] = {"002903.OF": "广发中证500ETF联接C类"}
                dicResult['000905.SH'] = {"006087.OF": "华泰柏瑞中证500ETF联接C"}
                dicResult['SPX.GI'] = {"050025.OF": "博时标普500ETF联接A"}
                dicResult['HSI.HI'] = {"000071.OF": "华夏恒生ETF联接A"}
                dicResult['SPSIOP.SPI'] = {"007844.OF": "华宝标普油气C人民币"}
                dicResult['AU9999.SGE'] = {'002610.OF': "博时黄金ETF联接A"}
                dicResult['H00140.SH'] = {'160602.OF': "鹏华普天债券A "}
                dicResult['000852.SH'] = {'006487.OF': "广发中证1000C "}
                dicResult['000906.SH'] = {'001588.OF':'天弘中证800A'}
                dicResult['399006.SZ'] = {'110026.OF': '易方达创业板ETF联接A'}
                dicResult['399005.SZ'] = {'161118.OF': '易方达中小板'}
        else:
            dicResult = product_name_dic

        total_fund_list = []
        for asset, dic in self.assetIndex.items():
            for index, index_name in dic.items():
                total_fund_list = total_fund_list + list(dicResult[index].keys())

        GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')

        df_list = []
        if fund_type=='ETF':
            for code in total_fund_list:
                temp_df = GetDataTotalMainDemo.get_hq_data(code, start_date=start_date, end_date=end_date,
                                                           code_style='etf_fund')
                temp_df.rename(columns={"close_price": code}, inplace=True)
                df_list.append(temp_df)
        else:
            for code in total_fund_list:
                temp_df = GetDataTotalMainDemo.get_hq_data(code, start_date=start_date, end_date=end_date,
                                                           code_style='fund',name_list=['net_value_adj'])
                temp_df.rename(columns={"net_value_adj": code}, inplace=True)
                df_list.append(temp_df)
        result_df = pd.concat(df_list, axis=1, sort=True)
        return dicResult, result_df

Exemple #7

0

Afficher le fichier

 def getAssetIndexData(self, assetIndex):
     GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
     total_index_code = [
         list(dic.keys()) for style, dic in assetIndex.items()
     ]
     df_list = []
     for asset_style, dic in assetIndex.items():
         for code in list(dic.keys()):
             index_df = GetDataTotalMainDemo.get_hq_data(
                 code,
                 start_date=self.startDate,
                 end_date=self.endDate,
                 code_style='index')
             index_df.rename(columns={"close_price": code}, inplace=True)
             df_list.append(index_df)
     index_data_df = pd.concat(df_list, axis=1, sort=True)
     index_data_df.fillna(method='pad', inplace=True)
     # 收益率序列
     indexReturnDf = (index_data_df -
                      index_data_df.shift(1)) / index_data_df.shift(1)
     return indexReturnDf

Exemple #8

0

Afficher le fichier

Fichier : GetFinanceReportData.py Projet : sufezl/EstimateValueData

 def __init__(self, dic_param, file_path=''):
     self.dic_param = dic_param
     self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
     self.file_path = file_path

Exemple #9

0

Afficher le fichier

Fichier : CalcHXBCorr.py Projet : sufezl/EstimateValueData

 def __init__(self):
     self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')

Exemple #10

0

Afficher le fichier

Fichier : CalcHXBCorr.py Projet : sufezl/EstimateValueData

class CalcHXBCorr:
    def __init__(self):
        self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')

    def get_Data(self):
        pass

    def get_main(self):
        df1 = pd.read_excel("被动指数产品.xlsx", index_col=0)
        dic_size = {}
        for fund_ma,tempdf in df1.groupby(by='基金管理人'):
            dic_size[fund_ma] = tempdf['基金规模'].sum()
        size_se = pd.Series(dic_size,name='基金公司管理规模').sort_values(ascending=False)
        dic_save_df = {}
        for manage_name in size_se.index.tolist():
            dic_save_df[manage_name]=df1.loc[df1['基金管理人']==manage_name]

        save_path = os.getcwd() + '\\基金公司管理产品概况.xlsx'
        writer = pd.ExcelWriter(save_path)
        for fund_name, save_df in dic_save_df.items():
            save_df.to_excel(writer, sheet_name=fund_name)
        writer.save()


        df = pd.read_excel("基金发行明细.xlsx", sheet_name='Sheet1', index_col=0)
        dic_df = {}
        for code in df.index.tolist():
            start_date = df.loc[code]['起始日'].strftime("%Y-%m-%d")
            end_date = df.loc[code]['结尾日'].strftime("%Y-%m-%d")
            temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='fund', start_date=start_date,
                                                            end_date=end_date, name_list=['acc_net_value'])
            temp_df.rename(columns={"acc_net_value": code}, inplace=True)
            temp_return_df = temp_df / temp_df.shift(1) - 1
            temp_return_df.dropna(inplace=True)
            dic_df[df.loc[code]['名称']] = temp_return_df

        min_date = df['起始日'].min().strftime("%Y-%m-%d")
        max_date = df['结尾日'].max().strftime("%Y-%m-%d")
        code_list2 = ['000300.SH', '000905.SH', '000852.SH', '000935.SH', '000933.SH', '000932.SH', '000936.CSI',
                      '000934.SH', '000931.CSI', '000930.CSI','000929.CSI', '000937.CSI', '000928.SH']  #
        name_dic = {'000300.SH': '沪深300', '000905.SH': '中证500', '000852.SH': '中证1000', '000935.SH': '中证信息',
                    '000933.SH': '中证医药', '000932.SH': '中证消费', '000936.CSI': '中证电信','000934.SH': '中证金融',
                    '000930.CSI':'中证工业','000929.CSI':'中证材料','000937.CSI':'中证公用','000928.SH':'中证能源',
                    '000931.CSI':'中证可选'}

        index_df_list = []
        for code in code_list2:
            temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='index', start_date=min_date,
                                                            end_date=max_date, )
            temp_df.rename(columns={"close_price": code}, inplace=True)
            index_df_list.append(temp_df)
        index_df = pd.concat(index_df_list, axis=1, sort=True)
        index_df.dropna(inplace=True)
        index_return_df = index_df / index_df.shift(1) - 1
        index_return_df.rename(columns=name_dic,inplace=True)

        df_list=[]
        for fund_name, fund_df in dic_df.items():
            start_corr_date = fund_df.index.tolist()[0]
            end_corr_date = fund_df.index.tolist()[-1]
            temp_index_df = index_return_df.loc[
                (index_return_df.index >= start_corr_date) & (index_return_df.index <= end_corr_date)]
            fund_index_df = pd.concat([fund_df, temp_index_df], axis=1, sort=True)
            corr_df = fund_index_df.corr()
            temp_Se = corr_df.iloc[0][1:]
            temp_corr_df = pd.DataFrame(temp_Se.values,columns=[fund_name],index=temp_Se.index.tolist())
            df_list.append(temp_corr_df)
        final_df = pd.concat(df_list,axis=1,sort=True).T
        final_df.to_excel("基金相关系数.xlsx")

Exemple #11

0

Afficher le fichier

class TalLiJudge:
    def __init__(self):
        self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')

    def get_data(self):
        target_df = pd.read_excel(
            "基金发行明细.xlsx",
            sheet_name='Sheet1',
        )
        total_df = pd.read_excel("主动权益类基金.xlsx")
        dic_df = {}
        for name in target_df['基金经理'].tolist():
            name_list = name.split('、')
            if len(name_list) == 1:
                dic_df[name] = total_df[total_df['基金经理'] == name]
            else:
                for name in name_list:
                    temp_df_list = []
                    for target_name in total_df['基金经理'].tolist():
                        if target_name.find(name) != -1:
                            temp_df_list.append(
                                total_df[total_df['基金经理'] == target_name])
                    temp_df = pd.concat(temp_df_list, axis=0, sort=True)
                    dic_df[name] = temp_df
        return dic_df

    def get_calc_result(self, dic_df):
        dic_name_df = {}
        dic_name_corr_df = {}
        dic_name_poc_df = {}
        for name, fund_df in dic_df.items():
            temp_df_list = []
            for num in range(fund_df.shape[0]):
                code = fund_df.iloc[num]['证券代码']
                se_name = fund_df.iloc[num]['证券简称']
                start_date = fund_df.iloc[num]['任职日期'].strftime("%Y-%m-%d")
                end_date = (datetime.today() -
                            timedelta(days=1)).strftime("%Y-%m-%d")
                temp_df = self.GetDataTotalMainDemo.get_hq_data(
                    code=code,
                    start_date=start_date,
                    end_date=end_date,
                    code_style='fund',
                    name_list=['net_value_adj'])
                temp_df.rename(columns={'net_value_adj': se_name},
                               inplace=True)
                temp_return_df = temp_df / temp_df.shift(1) - 1
                temp_return_df.dropna(inplace=True)
                temp_df_list.append(temp_return_df)
            temp_total_df = pd.concat(temp_df_list, axis=1, sort=True)
            temp_total_df.dropna(axis=1, how='all', inplace=True)
            dic_name_df[name] = temp_total_df
            dic_name_corr_df[name] = temp_total_df.corr()
            fields = [
                "prt_stockvalue_topindustryname2",
                "prt_stockvalue_topindustrytonav2",
                "prt_stockvalue_topindustrytostock2", "sec_name"
            ]
            name_dic = {
                "prt_stockvalue_topindustryname2".upper(): "重仓行业名称",
                "prt_stockvalue_topindustrytonav2".upper(): "重仓行业市值占基金资产净值比",
                "prt_stockvalue_topindustrytostock2".upper(): "重仓行业市值占股票投资市值比",
                "sec_name".upper(): "证券简称"
            }
            poc_df_list = []
            for order in range(1, 6):
                wss_data = w.wss(codes=fund_df['证券代码'].tolist(),
                                 fields=fields,
                                 options="rptDate=20200630;order=%s" %
                                 str(order))
                if wss_data.ErrorCode != 0:
                    print("wind获取因子数据有误，错误代码" + str(wss_data.ErrorCode))
                    continue
                resultDf = pd.DataFrame(wss_data.Data,
                                        index=wss_data.Fields,
                                        columns=wss_data.Codes).T
                resultDf.rename(columns=name_dic, inplace=True)
                resultDf['重仓行业排名'] = order
                poc_df_list.append(resultDf)

            if poc_df_list:
                temp_total_poc = pd.concat(poc_df_list, axis=0, sort=True)
                dic_name_poc_df[name] = temp_total_poc
        save_path = os.getcwd() + '\\HXBFundManager\\基金经理管理产品相关性.xlsx'
        poc_save_path = os.getcwd() + '\\HXBFundManager\\基金经理重仓行业概况.xlsx'
        writer = pd.ExcelWriter(save_path)
        for fund_name, corr_df in dic_name_corr_df.items():
            corr_df.to_excel(writer, sheet_name=fund_name)
        writer.save()

        writer2 = pd.ExcelWriter(poc_save_path)
        for fund_name, poc_df in dic_name_poc_df.items():
            poc_df.to_excel(writer2, sheet_name=fund_name)
        writer2.save()

    def get_main(self):
        dic_df = self.get_data()
        self.get_calc_result(dic_df)

Exemple #12

0

Afficher le fichier

Fichier : industry_recyle.py Projet : hongrubaiding/ETLDataWash

 def __init__(self):
     MysqlConDemo = MysqlCon()
     self.engine = MysqlConDemo.getMysqlCon('engine')
     self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
     self.industry_trade_limit = 20000000

Exemple #13

0

Afficher le fichier

Fichier : industry_recyle.py Projet : hongrubaiding/ETLDataWash

class IndustryRecyle:
    def __init__(self):
        MysqlConDemo = MysqlCon()
        self.engine = MysqlConDemo.getMysqlCon('engine')
        self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        self.industry_trade_limit = 20000000

    def remove_deadline_product(self, df):
        '''
            过滤已清算的基金
        :param df:
        :return:
        '''
        remove_df = pd.read_excel("清算基金.xlsx")
        temp_df = df.set_index('fund_code', drop=True)
        remove_code = list(set(remove_df['证券代码'].tolist()).intersection(df['fund_code'].tolist()))
        target_code = [code for code in df['fund_code'].tolist() if code not in remove_code]
        result_df = temp_df.loc[target_code]
        result_df['fund_code'] = result_df.index.tolist()
        return result_df

    def remove_index_same_name(self, df):
        '''
        过滤因指数简称相同而带来的bug
        :param df:
        :return:
        '''
        remove_index_c_fullname = []
        for fund_code, temp_df in df.groupby('fund_code'):
            if temp_df.shape[0] > 1:
                for num in range(temp_df.shape[0]):
                    index_c_fullname = temp_df.iloc[num]['index_c_fullname']
                    fund_name = temp_df.iloc[num]['fund_name']
                    if fund_name.find(index_c_fullname) == -1:
                        remove_index_c_fullname.append(index_c_fullname)

        num_list = []
        if remove_index_c_fullname:
            for index_num in range(df.shape[0]):
                if df.iloc[index_num]['index_c_fullname'] not in remove_index_c_fullname:
                    num_list.append(index_num)
            result = df.iloc[num_list]
        else:
            result = df
        return result

    def code_last_add(self, fund_type, df, label=''):
        code_list = df[label].tolist()
        if fund_type == 'ETF':
            fund_code_list = []
            for code in code_list:
                if code[0] == '5':
                    code = code + '.SH'
                elif code[0] == '1':
                    code = code + '.SZ'
                fund_code_list.append(code)
        elif fund_type == 'OTC':
            fund_code_list = [code + '.OF' for code in code_list]
        elif fund_type == 'index':
            fund_code_list = []
            for code in code_list:
                if code[0] == '0':
                    # if code in ['000859','000861','000860','000922','000978','000171','000824']:
                    if code in ['000859', '000860', '000861', '000922', '000963', '000978', '000964',
                                '000969', '000961', '000979', '000806']:
                        code = code + '.CSI'
                    else:
                        code = code + '.SH'
                elif code[0] == '3':
                    code = code + '.SZ'
                elif code[0] == 'H' or code[:2] in ['93', '95', '99']:
                    code = code + '.CSI'
                elif code == '980017':
                    code = code + '.CNI'
                elif code[:2] == 'CN':
                    code = code + '.CNI'
                fund_code_list.append(code)
        return fund_code_list

    def get_index_product_dic(self, df, fund_type='ETF'):
        temp_total_df = df.copy()
        index_name_dic = {}
        product_name_dic = {}
        size_df = self.GetDataTotalMainDemo.get_fund_size(code_list=df['fund_code'].tolist())
        if fund_type == 'ETF':
            for index_code, temp_df in df.groupby('index_code'):
                # trade_max = size_df['日均成交额'].loc[temp_df.index.tolist()].max()
                trade_max = size_df.loc[temp_df.index.tolist()].max()
                if np.isnan(trade_max) or trade_max < self.industry_trade_limit:
                    continue
                # product_code = size_df['日均成交额'].loc[temp_df.index.tolist()].argmax()
                product_code = size_df.loc[temp_df.index.tolist()].argmax()
                index_name_dic[index_code] = temp_df.iloc[0]['indx_sname']
                product_name_dic[index_code] = {product_code: temp_df.loc[product_code]['fund_name']}
        else:
            for index_code, temp_df in df.groupby(by='index_code'):
                if temp_df['establish_date'].min() >= '2018-01-01':
                    continue
                target_df = temp_df[temp_df['establish_date'] == temp_df['establish_date'].min()]

                if target_df.shape[0] == 1:
                    index_name_dic[index_code] = temp_df.iloc[0]['indx_sname']
                    product_name_dic[index_code] = {target_df.iloc[0]['fund_code']: target_df.iloc[0]['fund_name']}
                    continue
                elif target_df.shape[0] == 3:
                    for code in target_df['fund_code'].tolist():
                        if code[:2] == '16':
                            index_name_dic[index_code] = target_df.loc[code]['indx_sname']
                            product_name_dic[index_code] = {
                                target_df.loc[code]['fund_code']: target_df.loc[code]['fund_name']}
                            continue

                index_name_dic[index_code] = temp_df.iloc[0]['indx_sname']
                for name in target_df['fund_name'].tolist():
                    if name.find('分级') != -1 and name.find('A') == -1 and name.find('B') == -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                        # break
                    elif name.find('联接C') != -1 or name.find('联接ETFC') != -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                    elif name.find('指数C') != -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                    elif name.find('C') != -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                    elif name.find('分级B') != -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                    # elif name.find('中证细分医药交易A')!=-1:
                    #     product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                    #     product_name_dic[index_code] = {product_code: name}
                    # elif name.find('增强C')!=-1:
                    #     product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                    #     product_name_dic[index_code] = {product_code: name}
                    elif name.find('金瑞') != -1:
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
                    elif name.find('100A'):
                        product_code = target_df['fund_code'].tolist()[target_df['fund_name'].tolist().index(name)]
                        product_name_dic[index_code] = {product_code: name}
        return index_name_dic, product_name_dic

    def get_fund_index(self, fund_type='ETF', style_flag='行业'):
        if len(style_flag) > 2:
            style_flag = tuple(['行业', '主题'])
        else:
            style_flag = "('%s')" % style_flag

        if fund_type == 'ETF':
            sqlstr = '''SELECT t1.fund_code,t1.record_time, t1.fund_name,t1.establish_date,t1.indx_sname,t2.class_classify,t2.index_code,t2.index_c_fullname
             FROM zzindex_product_info t1,zzindex_info t2 WHERE t1.product_type="ETF" and t1.indx_sname=t2.indx_sname 
             and t2.class_classify in %s''' % str(style_flag)
        else:
            sqlstr = '''SELECT t1.fund_code, t1.record_time,t1.product_type,t1.fund_name,t1.establish_date,t1.indx_sname,t2.index_c_fullname,
            t2.class_classify,t2.index_code FROM zzindex_product_info t1,zzindex_info t2 WHERE t1.product_type!="ETF" 
            and t1.indx_sname=t2.indx_sname and t2.class_classify in %s''' % str(style_flag)

        df_init = pd.read_sql(sqlstr, self.engine)
        df_list = []
        for fund_code, temp_Df in df_init.groupby(by='fund_code'):
            if temp_Df.shape[0] == 1:
                df_list.append(temp_Df)
            else:
                df_list.append(temp_Df[temp_Df['record_time'] == temp_Df['record_time'].max()])

        df = pd.concat(df_list, axis=0, sort=True, )
        df['fund_code'] = self.code_last_add(fund_type=fund_type, df=df, label='fund_code')
        df = self.remove_index_same_name(df)
        df = self.remove_deadline_product(df)
        df['index_code'] = self.code_last_add(fund_type='index', df=df, label='index_code')
        index_name_dic, product_name_dic = self.get_index_product_dic(df, fund_type=fund_type)
        return index_name_dic, product_name_dic

Exemple #14

0

Afficher le fichier

Fichier : IndexSta.py Projet : hongrubaiding/ETLDataWash

    def get_lack_index_style(self, total_df):
        GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        temp_se = total_df['跟踪指数类型']
        have_se = temp_se.dropna()
        lack_fund_code = [
            code for code in temp_se.index.tolist()
            if code not in have_se.index.tolist()
        ]
        lack_code = [code + '.OF' for code in lack_fund_code]

        temp_df = GetDataTotalMainDemo.get_fund_base_info(
            fund_code_list=lack_code).sort_values('跟踪指数代码')
        temp_df['mysql_code'] = [
            code.split('.')[0] for code in temp_df.index.tolist()
        ]
        temp_df['基金成立日'] = [
            datetime.strftime(date_str, '%y-%m-%d')
            for date_str in temp_df['基金成立日'].tolist()
        ]
        temp_df.set_index(
            'mysql_code',
            inplace=True,
        )
        taret_lack_df = total_df.loc[lack_fund_code]
        taret_lack_df['基金类型'] = temp_df['基金类型']
        taret_lack_df['产品类型'] = temp_df['产品类型']
        taret_lack_df['基金名称'] = temp_df['基金全称']
        taret_lack_df['基金成立日'] = temp_df['基金成立日']
        taret_lack_df['跟踪指数代码'] = temp_df['跟踪指数代码']

        index_code_list = [
            index_code.split('.')[0]
            for index_code in temp_df['跟踪指数代码'].tolist() if index_code
        ]
        index_info_df = self.GetIndexAndProductDemo.get_index_code_info(
            index_code=index_code_list)

        index_style_list = []
        for index_code in taret_lack_df['跟踪指数代码']:
            if index_code:
                temp_code = index_code.split('.')[0]
                if temp_code in index_info_df.index.tolist():
                    class_classify = index_info_df.loc[temp_code][
                        'class_classify']
                    if class_classify == '国证综合指数':
                        class_classify = '规模'
                    elif class_classify in ['定制指数', '国证跨境未分类']:
                        class_classify = '主题'
                    index_style_list.append(class_classify)
                else:
                    if index_code.split('.')[1] in [
                            'MI', 'CSI'
                    ] and index_code.split('.')[0][0] == '7':
                        if index_code.split('.')[0][:6] in [
                                '707918', '714032', '714721', '718465'
                        ]:
                            index_style_list.append('策略')
                        else:
                            index_style_list.append('规模')
                    elif index_code.split('.')[0] in [
                            '399550',
                            '980001',
                            'CSPSADRP',
                    ]:
                        index_style_list.append('主题')
                    elif index_code.split('.')[0] in [
                            '830009', 'HSI', 'FCAH50', 'SPCQVCP', '136056L'
                    ] or index_code.split('.')[1] == 'HI':
                        index_style_list.append('规模')
                    elif index_code.split('.')[0] in ['930793']:
                        index_style_list.append('行业')
                    elif index_code.split('.')[0] in [
                            '930840', 'SPACEVCP', 'SPAHLVCP', 'SPCLLHCP'
                    ]:
                        index_style_list.append('策略')
                    elif index_code.split('.')[0] in ['DCESMFI', 'IMCI']:
                        index_style_list.append('商品')
                    else:
                        self.logger.info("缺失未处理的指数:%s" % index_code)
            else:
                index_style_list.append('规模')
        taret_lack_df['跟踪指数类型'] = index_style_list
        result_df = pd.concat(
            [total_df.loc[have_se.index.tolist()], taret_lack_df],
            axis=0,
            sort=True)
        return result_df

Exemple #15

0

Afficher le fichier

Fichier : GetIndexEst.py Projet : sufezl/EstimateValueData

class GetIndexEst:
    def __init__(self):
        self.start_date = (datetime.today() -
                           timedelta(days=365 * 10)).strftime("%Y-%m-%d")
        # self.end_date = datetime.today().strftime("%Y-%m-%d")
        self.end_date = '2020-08-31'
        self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
        self.file_path = os.getcwd() + '\\GetDataResult\\估值\\'
        self.file_month_path = os.getcwd() + '\\GetDataResult\\月度表现\\'

    def get_plot_figure(self, dic_df):
        for code, df in dic_df.items():
            temp_fig = plt.figure(figsize=(16, 9))
            temp_ax = temp_fig.add_subplot(111)
            df.plot(ax=temp_ax)
            temp_ax.grid()
            # temp_ax.set_title(u'%sPE走势' % code)
            plt.savefig(self.file_path + '%s估值走势图.png' % code)
            # plt.show()

    def get_regression(self, index_code_list, bench_code_list,
                       total_return_df):

        for index_code in index_code_list:
            list_r2, list_beta, list_tr, list_const = [], [], [], []
            Y = total_return_df[index_code].values
            for bench_code in bench_code_list:
                x = total_return_df[bench_code].values
                x = x.reshape(len(x), 1)
                c = np.ones((len(x), 1))
                X = np.hstack((c, x))
                res = (sm.OLS(Y, X)).fit()
                list_r2.append(res.rsquared)
                list_beta.append(res.params[1])
                list_const.append(res.params[0])

            res_indus = pd.DataFrame([])
            res_indus['指数代码'] = bench_code_list

            res_indus['拟合R方'] = list_r2

            res_indus['beta'] = list_beta
            res_indus['alpha'] = list_const
            res_indus = res_indus.sort_values('拟合R方', ascending=False)
            res_indus.to_excel(self.file_path + '%s风格指数回归结果.xlsx' % index_code,
                               index=False)

            maxR2Code = res_indus['指数代码'].tolist()[0]
            x = total_return_df[maxR2Code].values
            maxR2Alpha = res_indus['alpha'].tolist()[0]
            maxR2Beta = res_indus['beta'].tolist()[0]

            plt.style.use('ggplot')
            plt.figure(figsize=(16, 9))
            plt.scatter(x, Y, s=30, color='blue', label='样本实例')
            plt.plot(x,
                     maxR2Alpha + maxR2Beta * x,
                     linewidth=3,
                     color='red',
                     label='回归线')
            plt.ylabel('宽基指数超额收益')
            plt.xlabel('风格指数超额收益')
            # plt.title('%s拟合效果最好的风格指数：'%index_code +maxR2Code, fontsize=13,
            #           bbox={'facecolor': '0.8', 'pad': 5})
            plt.grid(True)
            plt.legend(loc='upper left')  # 添加图例
            plt.savefig(self.file_path + '%s拟合风格指数效果图.png' % index_code)
            # plt.show()

            plt.style.use('ggplot')
            fig = plt.figure(figsize=(16, 9))
            ax = fig.add_subplot(111)
            indeustryAccDf = (
                1 + total_return_df[[index_code, maxR2Code]]).cumprod()
            indeustryAccDf['指数收益比'] = indeustryAccDf[
                index_code] / indeustryAccDf[maxR2Code]
            indeustryAccDf.plot(ax=ax)
            ax.set_ylabel('累计收益率')
            ax.set_xlabel('时间')
            # ax.set_title('%s拟合效果最好的风格指数：'%index_code + maxR2Code, fontsize=13,
            #              bbox={'facecolor': '0.8', 'pad': 5})
            ax.grid(True)
            ax.legend(loc='down right')  # 添加图例
            plt.savefig(self.file_path + '%s拟合风格指数累计走势对比图.png' % index_code)

    def get_index_regress(self, index_code_list):
        bench_code_list = ['399314.SZ', '399315.SZ', '399316.SZ']
        df_list = []
        for code in bench_code_list:
            temp_df = self.GetDataTotalMainDemo.get_hq_data(
                code=code,
                start_date=self.start_date,
                end_date=self.end_date,
                code_style='index')
            temp_df.rename(columns={'close_price': code}, inplace=True)
            df_list.append(temp_df)
        bench_df = pd.concat(df_list, axis=1, sort=True)

        df_list2 = []
        for code in index_code_list:
            temp_df = self.GetDataTotalMainDemo.get_hq_data(
                code=code,
                start_date=self.start_date,
                end_date=self.end_date,
                code_style='index')
            temp_df.rename(columns={'close_price': code}, inplace=True)
            df_list2.append(temp_df)
        index_df = pd.concat(df_list2, axis=1, sort=True)

        total_df = pd.concat([index_df, bench_df], axis=1, sort=True)
        total_return_df = total_df / total_df.shift(1) - 1
        total_return_df.dropna(inplace=True)
        total_return_df.corr().to_excel(self.file_path + '相关系数.xlsx')

        self.get_regression(index_code_list, bench_code_list, total_return_df)

    def get_index_consit(self, index_code='000913.SH', weight=1):
        temp_df = self.GetDataTotalMainDemo.get_index_constituent(
            indexCode=index_code)
        wss_data = w.wss(
            codes=temp_df['stock_code'].tolist(),
            fields=[
                "industry_sw", "mkt_cap_ard", "roe_ttm", "yoyprofit",
                "dividendyield"
            ],
            options=
            "industryType=1;unit=1;tradeDate=20200823;rptDate=20191231;rptYear=2019"
        )
        code_ind_df = pd.DataFrame(wss_data.Data,
                                   index=wss_data.Fields,
                                   columns=wss_data.Codes).T
        name_dic = {
            "industry_sw".upper(): "申万一级行业",
            "mkt_cap_ard".upper(): "总市值",
            "dividendyield".upper(): "股息率（2019年）",
            "yoyprofit".upper(): "净利润同比增长率",
            "roe_ttm".upper(): "ROE"
        }
        code_ind_df.rename(columns=name_dic, inplace=True)
        try:
            use_df = temp_df[['stock_code', 'stock_weight',
                              'stock_name']].set_index('stock_code')
        except:
            a = 0

        stock_result_df = pd.concat([use_df, code_ind_df], sort=True, axis=1)
        df = pd.concat([use_df, code_ind_df], axis=1, sort=True)
        dic_ind_weight = {}
        for ind, stock_df in df.groupby('申万一级行业'):
            dic_ind_weight[ind] = stock_df['stock_weight'].sum() * weight / 100
        return dic_ind_weight, stock_result_df

    def calc_stock_weight(self, dic_stock_weight, index_se):
        for index_code, temp_df in dic_stock_weight.items():
            temp_df['port_stock_weight'] = temp_df['stock_weight'] * index_se[
                index_code]
        total_stock_df = pd.concat(list(dic_stock_weight.values()),
                                   axis=0,
                                   sort=True)
        total_stock_df['stock_code_label'] = total_stock_df.index.tolist()
        df_list = []
        for code, temp_stock_df in total_stock_df.groupby(
                by='stock_code_label'):
            if temp_stock_df.shape[0] > 1:
                target_df = temp_stock_df.iloc[0]
                target_df['port_stock_weight'] = temp_stock_df[
                    'port_stock_weight'].sum()
                target_df = pd.DataFrame(target_df).T
                df_list.append(target_df)
            else:
                df_list.append(temp_stock_df)
        total_stock_result = pd.concat(df_list, axis=0, sort=True).sort_values(
            by='port_stock_weight', ascending=False)
        name_dic = {'port_stock_weight': '权重', 'stock_name': '简称'}
        total_stock_result.rename(columns=name_dic).to_excel("股票持仓数据.xlsx")

    def get_port_weight(self, index_code_list=[], weight_list=[]):
        temp_se = pd.Series(weight_list, index=index_code_list)
        port_df_list = []
        dic_stock_weight = {}
        for index_code in index_code_list:
            dic_ind_weight, stock_weight_df = self.get_index_consit(
                index_code, weight=temp_se[index_code])
            dic_stock_weight[index_code] = stock_weight_df
            ind_weight_se = pd.Series(dic_ind_weight, name=index_code)
            port_df_list.append(ind_weight_se)

        self.calc_stock_weight(dic_stock_weight, temp_se)
        total_ind = pd.concat(port_df_list, axis=1, sort=True).sum(axis=1)
        total_ind.name = '组合行业暴露'

        bench_code_list = ['000300.SH', '000905.SH']
        bench_code_df_list = []
        for bench_code in bench_code_list:
            dic_bench_weight, _ = self.get_index_consit(bench_code)
            bench_weight_se = pd.Series(dic_bench_weight, name=bench_code)
            bench_code_df_list.append(bench_weight_se)
        bench_code_df = pd.concat(bench_code_df_list, axis=1,
                                  sort=True).rename(columns={
                                      '000300.SH': '沪深300',
                                      '000905.SH': "中证500"
                                  })
        total_df = pd.concat([total_ind, bench_code_df], axis=1,
                             sort=True).fillna(0)
        total_df['相对沪深300'] = total_df['组合行业暴露'] - total_df['沪深300']
        total_df['相对中证500'] = total_df['组合行业暴露'] - total_df['中证500']
        total_df.to_excel("主题OTC组合暴露.xlsx")

    def get_init_param(self):
        code_list1 = [
            '399006.SZ', '399005.SZ', '000852.SH', '399001.SZ', '000905.SH',
            '000300.SH', '000001.SH', '000016.SH'
        ]  # 宽基
        code_list2 = [
            '000935.SH', '000933.SH', '000932.SH', '000936.CSI', '000934.SH',
            '000931.CSI', '000930.CSI', '000929.CSI', '000937.CSI', '000928.SH'
        ]  # 行业
        code_list3 = [
            '990001.CSI', '980017.CNI', '399803.SZ', '399973.SZ', '399441.SZ',
            '931066.CSI', '931087.CSI', '000941.CSI', 'H30318.CSI',
            '931079.CSI', '931071.CSI', '399997.SZ', '399976.SZ', '399362.SZ',
            'H30533.CSI', '399812.SZ', '399974.SZ', '000860.CSI', '000861.CSI',
            '000859.CSI', '000015.SH'
        ]  # 主题
        code_list4 = [
            '399673.SZ', '399293.SZ', '399296.SZ', '399295.SZ', '930758.CSI',
            '399983.SZ', '000984.SH', '000971.SH', '000982.SH', '399990.SZ',
            '399702.SZ', '000050.SH', '931052.CSI', '930838.CSI', 'H30269.CSI',
            '000925.CSI'
        ]  # 策略
        code_list5 = ['399377.SZ', '399348.SZ', '399919.SZ', '000029.SH']  # 风格
        dic_index = {}
        dic_index['宽基'] = code_list1
        dic_index['行业'] = code_list2
        dic_index['主题'] = code_list3
        dic_index['策略'] = code_list4
        dic_index['风格'] = code_list5
        return dic_index

    def get_main(self, ):
        dic_index_param = self.get_init_param()
        for index_type, index_code_list in dic_index_param.items():
            GetTableDataDemo = GetTableData()
            total_df = GetTableDataDemo.get_data(code_list=index_code_list,
                                                 index_type=index_type)

            dic_df = {}
            dic_PE = {}
            for code in index_code_list:
                df = self.GetDataTotalMainDemo.get_hq_data(
                    code=code,
                    start_date=self.start_date,
                    end_date=self.end_date,
                    code_style='index_daily',
                    dic_param={
                        'fields': 'pe_ttm',
                        'filed_name': 'PE值'
                    })
                df.rename(columns={
                    'update_time': '时间',
                    'factor_value': "PE_TTM"
                },
                          inplace=True)
                last_value = df['PE_TTM'][-1]
                percent_num = (
                    df['PE_TTM'].sort_values().tolist().index(last_value) +
                    1) / df.shape[0]
                print('%s当前估值分位数%s' % (code, round(percent_num, 4)))
                df.to_excel(self.file_path + '估值%s.xlsx' % code[:6])
                dic_df[total_df.loc[code]['证券简称']] = df
                dic_PE[code] = {'PE': last_value, 'PE分位数': percent_num}
            pe_df = pd.DataFrame(dic_PE).T
            total_last_df = pd.concat([total_df, pe_df], axis=1, sort=True)

            name_list = [
                '证券代码', '证券简称', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)',
                '近3年(%)', '今年以来(%)', '近一年最大回撤(%)', 'Sharp比率', '年化波动(%)',
                '年化收益(%)', '月度成交额变化(%)', '月度换手率变化(%)', 'PE', 'PE分位数'
            ]
            total_last_df.to_excel(self.file_month_path +
                                   '%s指数月度表现.xlsx' % index_type,
                                   index=False)

            self.get_plot_figure(dic_df)