Exemple #1
0
    def cal_mfc_holding_alpha_exposure_period(self, fund_name,
                                              factor_name_list, beg_date,
                                              end_date):
        """ 计算某只基金在一段时间内暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date)
        new_data = pd.DataFrame()

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_mfc_holding_alpha_exposure_date(
                fund_name, factor_name_list, date)
            new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.exposure_data_path,
                                "MfcAlphaExposure_" + fund_name + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data

        data = data[factor_name_list]
        data.to_csv(out_file)
Exemple #2
0
    def load_macro_data_wind(self,
                             macro_code="M0000545",
                             beg_date="19900101",
                             end_date=datetime.today().strftime("%Y%m%d")):
        """ 下载宏观数据 """

        from WindPy import w
        w.start()

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 下载数据
        ##############################################################################
        data = w.edb(macro_code, beg_date, end_date, "Fill=Previous")
        new_data = pd.DataFrame(data.Data,
                                columns=data.Times,
                                index=data.Codes).T
        new_data = new_data.dropna()
        new_data.index = new_data.index.map(lambda x: x.strftime('%Y%m%d'))

        print(" Loading Macro Data %s From %s To %s " %
              (macro_code, beg_date, end_date))
        out_file = os.path.join(self.data_path, macro_code + '.csv')

        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            print(" File No Exist ", macro_code)
            data = new_data

        data = data.dropna(how='all')
        data.to_csv(out_file)
Exemple #3
0
    def cal_mfc_holding_barra_exposure_period(self, fund_name, beg_date,
                                              end_date):
        """ 计算某只基金在一段时间内暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date)
        new_data = pd.DataFrame()

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_mfc_holding_barra_exposure_date(fund_name, date)
            new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.exposure_data_path,
                                "MfcRiskExposure_" + fund_name + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data

        type_list = ["STYLE", "COUNTRY", "INDUSTRY"]
        name = Barra().get_factor_name(type_list=type_list)
        data = data[list(name.NAME_EN.values)]
        data.to_csv(out_file)
Exemple #4
0
    def cal_index_exposure(self,
                           index_code="000300.SH",
                           beg_date="20031231",
                           end_date=datetime.today().strftime("%Y%m%d"),
                           period="D"):

        """ 计算一段时间的BARRA暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date, period=period)

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_index_exposure_date(index_code, date)
            if i_date == 0:
                new_data = res
            else:
                new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.data_path_exposure,  "Index_Barra_Exposure_" + index_code + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data
        data.to_csv(out_file)
    def cal_fund_holder_risk_alpha_return_quarter(self, fund, end_date):
        """ 根据季报持仓风格暴露进行收益拆分 """

        beg_date = "20040101"
        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        fund_exposure = FundHolderExposureQuarter(
        ).get_fund_holder_exposure_quarter_daily(fund, beg_date, end_date)
        barra_riskfactor_return = Barra().get_factor_return(
            beg_date, end_date, type_list=type_list)
        date_series = Date().get_trade_date_series(beg_date, end_date)
        fund_pct = FundFactor().get_fund_factor("Repair_Nav_Pct",
                                                fund_pool=[fund],
                                                date_list=date_series)
        fund_pct.columns = ["FundReturn"]

        if fund_exposure is None:
            return None

        fund_riskfactor_return = barra_riskfactor_return.mul(fund_exposure)
        fund_return = pd.concat([fund_pct, fund_riskfactor_return], axis=1)
        fund_return = fund_return.dropna()

        barra_factor_name = list(
            Barra().get_factor_name(type_list=["STYLE"])["NAME_EN"].values)
        fund_return["StyleReturn"] = fund_return[barra_factor_name].sum(axis=1)
        barra_factor_name = list(
            Barra().get_factor_name(type_list=["INDUSTRY"])["NAME_EN"].values)
        fund_return["IndustryReturn"] = fund_return[barra_factor_name].sum(
            axis=1)
        barra_factor_name = list(
            Barra().get_factor_name(type_list=["COUNTRY"])["NAME_EN"].values)
        fund_return["CountryReturn"] = fund_return[barra_factor_name].sum(
            axis=1)
        barra_factor_name = ["StyleReturn", "IndustryReturn", "CountryReturn"]
        fund_return["SumReturn"] = fund_return[barra_factor_name].sum(axis=1)
        fund_return["AlphaReturn"] = fund_return["FundReturn"] - fund_return[
            "SumReturn"]

        data_new = fund_return.dropna()

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_holding_risk_alpha_return_quarter')
        out_file = os.path.join(
            out_path, 'Fund_Holder_Risk_Alpha_Return_Quarter_' + fund + "_" +
            end_date + '.csv')
        print(out_file)

        if os.path.exists(out_file):
            data_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            data_old.index = data_old.index.map(str)
            params = FactorOperate().pandas_add_row(data_old, data_new)
        else:
            params = data_new
        params.to_csv(out_file)
        return data_new
Exemple #6
0
    def load_index_factor(self,
                          index_code="000300.SH",
                          beg_date=None,
                          end_date=datetime.today().strftime("%Y%m%d"),
                          primary=False):
        """ 下载一个指数 最近的Factor """

        from WindPy import w
        w.start()

        out_file = os.path.join(self.data_data_factor, index_code + '.csv')
        if beg_date is None and os.path.exists(out_file):
            beg_date = Date().get_trade_date_offset(end_date, -20)

        if beg_date is None and not os.path.exists(out_file):
            try:
                base_data = w.wsd(index_code, "basedate")
                beg_date = base_data.Data[0][0].strftime("%Y%m%d")
            except Exception as e:
                beg_date = '19991231'

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)
        print(beg_date, end_date, index_code, primary)

        # 下载数据
        ##############################################################################

        if primary:
            index_data = w.wsd(index_code, "close,pe_ttm,pb_lf", beg_date,
                               end_date, "Fill=Previous")
        else:
            index_data = w.wsd(index_code, "close", beg_date, end_date,
                               "Fill=Previous")

        new_data = pd.DataFrame(index_data.Data,
                                index=index_data.Fields,
                                columns=index_data.Times).T
        new_data.index = new_data.index.map(lambda x: x.strftime('%Y%m%d'))
        print(new_data)

        try:
            new_data['PCT'] = new_data['CLOSE'].pct_change()
            print(" Loading Index Factor ", index_code)

            if os.path.exists(out_file):
                data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
                data.index = data.index.map(str)
                data = FactorOperate().pandas_add_row(data, new_data)
            else:
                print(" File No Exist ", index_code)
                data = new_data
            data = data.dropna(how='all')
            data.to_csv(out_file)
        except Exception as e:
            print(e)
            print(" Loading Index Factor Error", index_code)
Exemple #7
0
    def get_fund_factor(self, factor_name, date_list=None, fund_pool=None):
        """ 得到基金因子数据 """

        out_file = os.path.join(self.data_path_factor, factor_name + '.csv')
        data = pd.read_csv(out_file, index_col=[0], encoding='gbk')
        data = FactorOperate().drop_duplicated(data)
        data.index = data.index.map(str)
        data.columns = data.columns.map(str)

        if date_list is not None:
            data = data.ix[date_list, :]
        if fund_pool is not None:
            data = data.ix[:, fund_pool]

        return data
    def write_factor_h5(self, data, factor_name, path=None, data_type='f'):
        """ 写入 H5 Stock Factor文件 """

        if path is None:
            path = self.get_h5_path(type='my_alpha')

        file = os.path.join(path, '%s.h5' % factor_name)

        # 检查数据结构
        #############################################################################
        # index --> code columns --> date

        data.index = data.index.map(str)
        data.columns = data.columns.map(str)
        if data.columns[0][0] not in ["1", "2"]:
            print(" Data Columns in not Date ")
        data = data.T.dropna(how='all').T

        # 写入H5数据
        #############################################################################

        if not os.path.exists(file):
            print(" The File %s Not Exist, Saving ... " % file)
            HdfMfc().write_hdf_factor(file, data)
        else:
            old_data = self.read_factor_h5(factor_name, path, data_type)
            old_data = old_data.T
            new_data = data.T
            save_data = FactorOperate().pandas_add_row(old_data=old_data,
                                                       new_data=new_data)
            save_data = save_data.T
            save_data = save_data.T.dropna(how='all').T
            print(" The File %s Exist, Saving... " % file)
            HdfMfc().write_hdf_factor(file, save_data, type=data_type)
Exemple #9
0
    def replace_suspension_with_nan(self, data):

        factor = data.copy()
        status = StockFactorData().read_factor_h5("TradingStatus")
        factor, status = FactorOperate().make_same_index_columns(
            [factor, status])
        statusif = status.applymap(lambda x: x == 1.0)
        factor[statusif] = np.nan
        return factor
Exemple #10
0
    def load_ipo_data(self, beg_date):

        """ 下载IPO数据 上市日期 发行价 中签率 申购上限 等等"""

        data = self.get_new_stock_list(beg_date)
        code_str = ','.join(data.index.values)

        data = w.wss(code_str,
                     "sec_name,ipo_date,ipo_price,ipo_cashratio,ipo_lotteryrate_abc,ipo_otc_cash_pct,ipo_op_uplimit",
                     "instituteType=1")

        data_pd = pd.DataFrame(data.Data, index=data.Fields, columns=data.Codes).T
        data_pd["IPO_DATE"] = data_pd["IPO_DATE"].map(lambda x: x.strftime('%Y-%m-%d'))
        data_pd.columns = ['股票名称', '上市日期', '发行价格', '网上中签率(%)',
                           '网下A类中签率(%)', '网下总计中签率(%)', '申购上限数量(万股)']
        data_pd['申购上限金额(万元)'] = data_pd["申购上限数量(万股)"] * data_pd['发行价格']

        data_pd = data_pd.dropna()
        data_pd = data_pd.sort_values(by=['上市日期'], ascending=True)

        for i_code in range(0, len(data_pd)):

            code = data_pd.index.values[i_code]
            ipo_date = data_pd.ix[i_code, '上市日期']
            open_date, open_pct, open_price = self.get_open_date_pct(code, ipo_date)
            data_pd.ix[i_code, '开板日期'] = open_date
            data_pd.ix[i_code, '开板价格'] = open_price
            data_pd.ix[i_code, '开板收益'] = open_pct

        print(data_pd)
        file = os.path.join(self.data_path, 'ipo_data.xlsx')
        data = pd.read_excel(file, index_col=[1])
        data = data.T.dropna(how='all').T

        concat_data = FactorOperate().pandas_add_row(data, data_pd)
        concat_data = concat_data.sort_values(by=['上市日期'], ascending=True)
        excel = WriteExcel(file)
        worksheet = excel.add_worksheet("新股检测")
        excel.write_pandas(concat_data, worksheet, begin_row_number=0, begin_col_number=1,
                           num_format_pd=None, color="orange", fillna=True)
        excel.close()
Exemple #11
0
    def fillna_with_mad_market(self, data):

        factor = data.copy()
        factor_val = factor.values

        status = StockFactorData().read_factor_h5("TradingStatus")
        factor, status = FactorOperate().make_same_index_columns(
            [factor, status])

        if_list = status.applymap(lambda x: x in [0.0, 1.0])
        if_nan = factor.isnull()

        mask_val = (if_list & if_nan).values
        md_val = factor.median(axis=0).values
        md_remat_val = np.tile(np.vstack(md_val), (1, factor.shape[0])).T
        factor_fill_mad_val = np.where(mask_val, md_remat_val, factor_val)
        factor_fill_mad_pandas = pd.DataFrame(factor_fill_mad_val,
                                              index=factor.index,
                                              columns=factor.columns)

        return factor_fill_mad_pandas
Exemple #12
0
    def load_fund_factor(self, factor_name, beg_date, end_date):
        """ 财汇数据库下载基金因子数据(增量更新) """

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        print("Loading Fund Factor %s From %s To %s" %
              (factor_name, beg_date, end_date))
        new_data = FinDb().load_raw_data_filter_period(factor_name, beg_date,
                                                       end_date)
        fund_info_data = FundStatic().get_findb_fund_info()
        table_name, field_en, filter_field, field_ch, val_name = FinDb(
        ).get_load_findb_param(factor_name)

        new_data = pd.merge(new_data, fund_info_data, on="证券内码", how='inner')
        new_data = pd.DataFrame(
            new_data[val_name].values,
            index=[list(new_data['基金代码'].values),
                   list(new_data['日期'].values)])
        new_data = new_data.sort_index()
        new_data = new_data[~new_data.index.duplicated()]
        new_data = new_data.unstack()

        new_data.columns = new_data.columns.droplevel(level=0)
        new_data = new_data.T
        new_data = new_data.dropna(how='all')
        new_data.index = new_data.index.map(str)

        out_file = os.path.join(self.data_path_factor, factor_name + '.csv')

        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            print(" File No Exist ", factor_name)
            data = new_data

        data = data.dropna(how='all')
        data.to_csv(out_file)
Exemple #13
0
    def load_mfc_public_fund_nav(self, beg_date=None, end_date=None):
        """ wind下载多有泰达公募基金的复权净值 增量更新(默认更新最近两个月) """

        if end_date is None:
            end_date = Date().change_to_str(datetime.today())
        if beg_date is None:
            beg_date = Date().get_trade_date_offset(end_date, -40)

        from WindPy import w
        w.start()

        data = MfcGetData().get_mfc_fund_info()
        data = data[data.Type == "公募"]

        for i_code in range(len(data)):

            fund_code = data.Code.values[i_code]
            nav_data = w.wsd(fund_code, "nav,NAV_adj,NAV_acc,NAV_adj_return1",
                             beg_date, end_date, "")
            nav_data = pd.DataFrame(nav_data.Data,
                                    index=nav_data.Fields,
                                    columns=nav_data.Times).T
            nav_data.index = nav_data.index.map(lambda x: x.strftime('%Y%m%d'))
            new_data = nav_data.dropna(subset=['NAV_ADJ'])
            print(" Load Mfc Public Fund %s Nav " % fund_code)

            # 合并存储数据
            file = os.path.join(self.data_path, "nav\public_fund",
                                fund_code + "_Nav.csv")
            if os.path.exists(file):
                old_data = pd.read_csv(file, index_col=[0], encoding='gbk')
                old_data.index = old_data.index.map(str)
                nav_data = FactorOperate().pandas_add_row(old_data, new_data)
            else:
                nav_data = new_data

            nav_data.to_csv(file)
Exemple #14
0
    def standardization_free_mv(data, free_mv):
        """
        均值为市值加权均值 \ 标准差为普通标准差
        Barra风险模型做法 注意流通市值要用昨天的 而非今天的
        """

        if type(data) == pd.Series:

            data_ser = data.copy()

            concat_data = pd.concat([data_ser, free_mv], axis=1)
            concat_data.columns = ['Factor', 'Mv']
            concat_data = concat_data.dropna()
            concat_data['Mv'] = concat_data['Mv'] / concat_data['Mv'].sum()

            mean_weight_free_mv = (concat_data['Mv'] *
                                   concat_data['Factor']).sum()

            std = data_ser.std()
            normal_series = (data_ser - mean_weight_free_mv) / std
            return normal_series

        elif type(data) == pd.DataFrame:

            factor = data.copy()
            free_mv = free_mv.T.shift(1).T
            [factor, free_mv
             ] = FactorOperate().make_same_index_columns([factor, free_mv])
            free_mv = free_mv / free_mv.sum()
            free_mv = free_mv.fillna(0.0)
            free_mv = free_mv.T
            factor = factor.T

            mean_weight_free_mv = factor.mul(free_mv).sum(axis=1)

            std = factor.std(axis=1)
            factor = factor.sub(mean_weight_free_mv, axis='index')
            factor = factor.div(std, axis="index")
            factor = factor.T
            return factor

        else:
            print(" Type of Data can not be remove extreme value ")
            return None
Exemple #15
0
    def save_file_excel(self, new_data, type, name, num_format):
        """
        将一段时间内的每日拆分写入文件(增量写入)
        将一段时间内的每日暴露写入文件(增量写入)
        """

        if len(new_data) > 0:

            save_path = os.path.join(self.data_path, self.fund_name, "每日汇总")
            if not os.path.exists(save_path):
                os.makedirs(save_path)

            file = '%s_%s_%s.xlsx' % (self.fund_name, type, name)
            file_name = os.path.join(save_path, file)
            print("写入", file_name)

            if os.path.exists(file_name):
                old_data = pd.read_excel(file_name, index_col=[0])
                old_data.index = old_data.index.map(str)
                data = FactorOperate().pandas_add_row(old_data, new_data)
            else:
                data = new_data

            num_format_pd = pd.DataFrame([],
                                         columns=data.columns,
                                         index=['format'])
            num_format_pd.loc['format', :] = num_format

            excel = WriteExcel(file_name)
            worksheet = excel.add_worksheet(self.fund_name)
            excel.write_pandas(data,
                               worksheet,
                               begin_row_number=0,
                               begin_col_number=0,
                               num_format_pd=num_format_pd,
                               color="red",
                               fillna=True)
            excel.close()
        else:
            save_path = os.path.join(self.data_path, self.fund_name, "每日汇总")
            file = '%s_%s_%s.xlsx' % (self.fund_name, type, name)
            file_name = os.path.join(save_path, file)
            print("写入数据为0", file_name)
Exemple #16
0
    def cal_fund_holder_exposure_halfyear(self, fund_code, beg_date, end_date):
        """ 计算单个基金的半年持仓暴露(注意计算的是非满仓暴露) """

        # fund_code, beg_date, end_date = "000001.OF", "20170101", "20190101"

        type_list = ['COUNTRY', 'STYLE', 'INDUSTRY']
        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        out_file = os.path.join(
            self.halfyear_exposure_path,
            'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code)

        if not os.path.exists(out_file):
            beg_date = "20040101"

        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='S')
        fund_holding = FundHolder().get_fund_stock_weight_halfyear(fund_code)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
            print(date_series)
        else:
            return None

        if len(date_series) > 0:

            for i_date in range(0, len(date_series)):

                date = date_series[i_date]
                report_date = Date().get_normal_date_month_end_day(date)
                trade_date = Date().get_trade_date_month_end_day(date)
                print("Calculate HalfYear Holder Exposure %s %s" %
                      (fund_code, report_date))

                barra_exposure = Barra().get_factor_exposure_date(
                    trade_date, type_list)
                fund_holding_date = FundHolder(
                ).get_fund_stock_weight_halfyear(fund_code)

                if (barra_exposure is None) or (len(fund_holding_date) == 0):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    fund_holding_date = pd.DataFrame(fund_holding[report_date])
                    fund_holding_date = fund_holding_date.dropna()
                    fund_holding_date = fund_holding_date.sort_values(
                        by=[report_date], ascending=False)
                    fund_holding_date.columns = ["Weight"]
                    fund_holding_date /= 100.0
                    data = pd.concat([fund_holding_date, barra_exposure],
                                     axis=1)
                    data = data.dropna()

                    if (len(data) == 0) or (data is None):
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])
                    else:
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])

                        for i_factor in range(len(barra_name)):

                            factor_name = barra_name[i_factor]
                            data_weight = data[['Weight', factor_name]]
                            data_weight['StockExposure'] = data_weight[
                                'Weight'] * data_weight[factor_name]
                            exp = data_weight['StockExposure'].sum()
                            exposure_add.ix[report_date, factor_name] = exp

                        country_name = Barra().get_factor_name(
                            ["COUNTRY"])["NAME_EN"].values[0]
                        position = FundFactor().get_fund_factor(
                            "Stock_Ratio",
                            date_list=[report_date],
                            fund_pool=[fund_code])
                        exposure_add.ix[
                            report_date,
                            country_name] = position.values[0][0] / 100

                if i_date == 0:
                    exposure_new = exposure_add
                else:
                    exposure_new = pd.concat([exposure_new, exposure_add],
                                             axis=0)
        else:
            exposure_new = pd.DataFrame([])

        # 合并新数据
        ####################################################################
        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new

        if len(params) > 0:
            params = params[barra_name]
        params.to_csv(out_file)
    def cal_fund_regression_risk_alpha_return_style(self, fund, beg_date,
                                                    end_date):

        # 参数
        ####################################################################
        exposure_index = FundRegressionExposureStyle(
        ).get_fund_regression_exposure_style(fund)

        if exposure_index is not None:

            # 取得数据 指数收益率数据 和 基金涨跌幅数据
            ####################################################################
            barra_name = list(Barra().get_factor_name(['STYLE'
                                                       ])['NAME_EN'].values)
            barra_name.extend(
                list(Barra().get_factor_name(["COUNTRY"])['NAME_EN'].values))

            barra_return = Barra().get_factor_return(
                None, None, type_list=["INDUSTRY", "COUNTRY", "STYLE"])
            barra_return = barra_return[barra_name]
            barra_return /= 100.0

            if fund[len(fund) - 2:] == 'OF':
                fund_return = FundFactor().get_fund_factor(
                    "Repair_Nav_Pct", None, [fund]) / 100.0
                fund_return.columns = ["FundReturn"]
            else:
                fund_return = Index().get_index_factor(fund, attr=["PCT"])
                fund_return.columns = ["FundReturn"]

            exposure_index = exposure_index.dropna(how="all")
            index_exposure_return = barra_return.mul(exposure_index)
            index_exposure_return = index_exposure_return.dropna(how="all")
            data = pd.concat([fund_return, index_exposure_return], axis=1)
            data = data.dropna(how="all")
            data = data.loc[index_exposure_return.index, :]
            data = data.dropna(subset=["FundReturn"])
            data["SumReturn"] = data[barra_name].sum(axis=1, skipna=True)
            data["AlphaReturn"] = data["FundReturn"] - data["SumReturn"]
            data = data.loc[beg_date:end_date, :]
            data["CumFundReturn"] = (data["FundReturn"] + 1.0).cumprod() - 1.0
            data["CumAlphaReturn"] = (data["AlphaReturn"] +
                                      1.0).cumprod() - 1.0
            data["CumSumReturn"] = (data["SumReturn"] + 1.0).cumprod() - 1.0

            # 合并新数据
            ####################################################################
            out_path = os.path.join(self.data_path_exposure,
                                    'fund_regression_risk_alpha_return_style')
            out_file = os.path.join(
                out_path, 'Fund_Regression_Risk_Alpha_Style_' + fund + '.csv')

            if os.path.exists(out_file):
                params_old = pd.read_csv(out_file,
                                         index_col=[0],
                                         encoding='gbk')
                params_old.index = params_old.index.map(str)
                params = FactorOperate().pandas_add_row(params_old, data)
            else:
                params = data
            print(params)
            params.to_csv(out_file)
    def cal_fund_regression_exposure_index(self, fund, beg_date, end_date, period="D"):

        """
        计算一只基金每日对不同指数的暴露
        """

        # 参数
        ####################################################################
        one_index_up_limit = 1.0
        one_index_low_limit = 0.0
        sum_index = 1.0

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得 指数收益率数据
        ####################################################################
        for i_index in range(len(self.index_code_list)):
            index_code = self.index_code_list[i_index]
            index_return = Index().get_index_factor(index_code, Nattr=["PCT"])
            if i_index == 0:
                index_return = Index().get_index_factor(index_code, attr=["PCT"])
                index_return_all = index_return
            else:
                index_return_all = pd.concat([index_return_all, index_return], axis=1)

        index_return_all.columns = self.index_code_list

        # 取得 基金涨跌幅数据
        ####################################################################
        if fund[len(fund)-2:] == 'OF':
            fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None, [fund]) / 100.0
        else:
            fund_return = Index().get_index_factor(fund, attr=["PCT"])
            fund_return.columns = [fund]

        # 合并数据
        ####################################################################
        data = pd.concat([fund_return, index_return_all], axis=1)
        data = data.dropna(subset=[fund])

        # 回归日期
        ####################################################################
        date_series = Date().get_trade_date_series(beg_date, end_date, period=period)
        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 循环优化计算每天的暴露
        ####################################################################

        for i_date in range(0, len(date_series)):

            # 约束回归所需要的数据
            #############################################################################################
            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(period_end_date, -self.regression_period)

            period_date_series = Date().get_trade_date_series(period_beg_date, period_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna(subset=[fund])
            data_periods = data_periods.T.dropna(how='all').T
            data_periods = data_periods.T.fillna(data_periods.mean(axis=1)).T
            data_periods = data_periods.dropna()

            # 有约束的回归 可以转换为二次规划
            #############################################################################################
            if len(data_periods) > self.regression_period_min and (len(data_periods.columns) > 1):

                # 平方和最小
                #############################################################################################
                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values

                P = 2 * np.dot(x.T, x)
                Q = -2 * np.dot(x.T, y)

                # 单个指数上下限为 0
                #############################################################################################
                G_up = np.diag(np.ones(x.shape[1]))
                G_low = - np.diag(np.ones(x.shape[1]))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack(np.ones((x.shape[1], 1))) * one_index_up_limit
                h_low = - np.row_stack(np.ones((x.shape[1], 1))) * one_index_low_limit
                h = np.row_stack((h_up, h_low))

                #############################################################################################
                A = np.column_stack(np.ones((x.shape[1], 1)))
                b = np.array([sum_index])

                # 开始规划求解
                ############################################################################################
                try:
                    P = matrix(P)
                    Q = matrix(Q)
                    G = matrix(G)
                    h = matrix(h)
                    A = matrix(A)
                    b = matrix(b)
                    result = sol.qp(P, Q, G, h, A, b)
                    params_add = pd.DataFrame(np.array(result['x'][0:]),
                                              columns=[period_end_date], index=data_periods.columns[1:]).T
                    print("########## Fund Regression Index Exposure GF %s %s ##########" % (fund, period_end_date))
                except Exception as e:
                    params_add = pd.DataFrame([], columns=[period_end_date], index=data_periods.columns[1:]).T
                    print("########## Quadratic Programming is InCorrect %s %s ##########" % (fund, period_end_date))
            else:
                params_add = pd.DataFrame([], columns=[period_end_date], index=data_periods.columns[1:]).T
                print("########## Fund Regression Data Len is Too Small %s %s ##########" % (fund, period_end_date))

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据 并存储数据
        ####################################################################
        out_file = os.path.join(self.data_path, self.file_prefix + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new
        params.to_csv(out_file)
    def __init__(self):

        FactorStandard.__init__(self)
        FactorRemoveValue.__init__(self)
        FactorOperate.__init__(self)
        FactorFillNa.__init__(self)
    def cal_fund_holder_exposure_quarter(self, fund, beg_date, end_date):
        """  计算单个基金的季度持仓暴露 (前十大重仓暴露) """

        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='Q')
        fund_holding = FundHolder().get_fund_stock_weight_quarter(fund)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
        else:
            return None

        for i_date in range(0, len(date_series)):

            date = date_series[i_date]
            report_date = Date().get_normal_date_month_end_day(date)
            trade_date = Date().get_trade_date_month_end_day(date)

            barra_name = list(
                Barra().get_factor_name(type_list)['NAME_EN'].values)
            barra_exposure = Barra().get_factor_exposure_date(
                trade_date, type_list)

            print(
                "########## Calculate Quarter Holder Exposure %s %s ##########"
                % (fund, report_date))

            if (barra_exposure is None) or (fund_holding is None):
                exposure_add = pd.DataFrame([],
                                            columns=barra_name,
                                            index=[report_date])
            else:
                fund_holding_date = pd.DataFrame(fund_holding[report_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date = fund_holding_date.sort_values(
                    by=[report_date], ascending=False)
                fund_holding_date.columns = ["Weight"]
                data = pd.concat([fund_holding_date, barra_exposure], axis=1)
                data = data.dropna()

                if (len(data) == 0) or (data is None):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])

                    for i_factor in range(len(barra_name)):
                        factor_name = barra_name[i_factor]
                        data_weight = data[['Weight', factor_name]]
                        data_weight['StockExposure'] = data['Weight'] * data[
                            factor_name]
                        exposure_add.ix[report_date,
                                        factor_name] = data_weight[
                                            'StockExposure'].sum() / 100.0

                    country_name = Barra().get_factor_name(
                        ["COUNTRY"])["NAME_EN"].values[0]
                    position = FundFactor().get_fund_factor(
                        "Stock_Ratio",
                        date_list=[report_date],
                        fund_pool=[fund])
                    position = position.values[0][0]
                    exposure_add.ix[report_date, country_name] = position / 100

            if i_date == 0:
                exposure_new = exposure_add
            else:
                exposure_new = pd.concat([exposure_new, exposure_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_holding_exposure_quarter')
        out_file = os.path.join(
            out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv')

        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new
        params.to_csv(out_file)
    def cal_fund_regression_exposure_style(self,
                                           fund,
                                           beg_date,
                                           end_date,
                                           period="D"):

        # 参数
        ####################################################################
        up_style_exposure = 1.25
        up_position_exposure = 0.95
        low_position_exposure = 0.75
        position_sub = 0.08

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得数据 因子收益率数据 和 基金涨跌幅数据
        ####################################################################
        type_list = ['STYLE', 'COUNTRY']

        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        barra_return = Barra().get_factor_return(None, None, type_list)

        date_series = Date().get_trade_date_series(beg_date,
                                                   end_date,
                                                   period=period)

        if fund[len(fund) - 2:] == 'OF':
            fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None,
                                                       [fund])
        else:
            fund_return = Index().get_index_factor(fund, attr=["PCT"]) * 100
            fund_return.columns = [fund]

        data = pd.concat([fund_return, barra_return], axis=1)
        data = data.dropna()
        print(" Fund Code Total Len %s " % len(data))
        factor_number = len(barra_name)
        stock_ratio = FundFactor().get_fund_factor("Stock_Ratio", None,
                                                   [fund]) / 100

        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 循环回归计算每天的暴露 计算当天的暴露之时需要 前一天及之前数据
        ####################################################################

        for i_date in range(0, len(date_series)):

            # 回归所需要的数据
            ####################################################################
            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)
            data_end_date = Date().get_trade_date_offset(period_end_date, -0)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, data_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna()

            # 上个季度基金仓位
            #####################################################################################
            quarter_date = Date().get_last_fund_quarter_date(period_end_date)
            stock_ratio_fund = stock_ratio.loc[quarter_date, fund]
            print(
                "########## Calculate Regression Exposure %s %s %s %s %s %s ##########"
                % (fund, period_beg_date, period_end_date, quarter_date,
                   len(data_periods), stock_ratio_fund))

            if len(data_periods) > self.regression_period_min:

                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values
                x_add = sm.add_constant(x)

                low_position_exposure = max(stock_ratio_fund - position_sub,
                                            low_position_exposure)
                if np.isnan(low_position_exposure):
                    low_position_exposure = 0.75

                P = 2 * np.dot(x_add.T, x_add)
                Q = -2 * np.dot(x_add.T, y)

                G_up = np.diag(np.ones(factor_number + 1))
                G_low = -np.diag(np.ones(factor_number + 1))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                     np.array([up_position_exposure])))
                h_low = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                      np.array([-low_position_exposure])))
                h = np.row_stack((h_up, h_low))

                P = matrix(P)
                Q = matrix(Q)
                G = matrix(G)
                h = matrix(h)
                try:
                    result = sol.qp(P, Q, G, h)
                    params_add = pd.DataFrame(np.array(result['x'][1:]),
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)
                except Exception as e:
                    params_add = pd.DataFrame([],
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)

            else:
                params_add = pd.DataFrame([],
                                          columns=[period_end_date],
                                          index=barra_name).T
                print(params_add)

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_regression_exposure_style')
        out_file = os.path.join(
            out_path, 'Fund_Regression_Exposure_Style_' + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new
        print(params)
        params.to_csv(out_file)
Exemple #22
0
    def cal_fund_regression_exposure_index(self,
                                           reg_code,
                                           beg_date,
                                           end_date,
                                           period="D"):
        """
        回归单只基金区间内指数暴露
        用指数去你基金收益率 最小化跟踪误差的前提
        指数权重之和为1 指数不能做空 指数和上期权重换手不能太大
        """

        date_series = Date().get_trade_date_series(beg_date, end_date, period)
        data_beg_date = Date().get_trade_date_offset(beg_date,
                                                     -self.regression_period)
        end_date = Date().change_to_str(end_date)

        code_list = [reg_code]
        code_list.extend(self.index_code_list)

        data = self.asset_pct.loc[data_beg_date:end_date, code_list]
        data = data.dropna(subset=[reg_code])

        if len(data) < self.regression_period:
            return None

        print("Regression %s With %s" % (reg_code, self.index_code_list))
        print("Length of Return Data Is %s " % len(data))

        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 上次计算的风格
        last_date = Date().get_trade_date_offset(date_series[0], -1)
        params_old = self.get_fund_regression_exposure_index_date(
            reg_code, last_date)
        params_old = params_old.T
        print("old", params_old)

        for i_date in range(0, len(date_series)):

            # 回归所需要的数据 过去60个交易日

            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)
            data_end_date = Date().get_trade_date_offset(period_end_date, -0)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, data_end_date)
            data_periods = data.loc[period_date_series, :]
            data_periods = data_periods.dropna(subset=[reg_code])
            data_periods = data_periods.T.dropna(how='all').T
            data_periods = data_periods.T.fillna(data_periods.mean(axis=1)).T
            data_periods = data_periods.dropna()

            print(
                "########## Calculate Regression Exposure %s %s %s %s ##########"
                % (reg_code, period_beg_date, period_end_date,
                   len(data_periods)))

            if len(data_periods) > self.regression_period_min and (len(
                    data_periods.columns) > 1):

                y = data_periods.iloc[:, 0].values
                x = data_periods.iloc[:, 1:].values
                n = x.shape[1]

                if params_old.empty or params_old.sum().sum() < 0.5:
                    params_old = pd.DataFrame(n * [1 / n],
                                              columns=[period_end_date],
                                              index=data_periods.columns[1:]).T

                turnover = self.turnover
                params_old = params_old.loc[:, data_periods.columns[1:]]
                params_old = params_old.fillna(0.0)
                weight_old = params_old.values[0]

                w = cvx.Variable(n)
                sigma = y - x * w
                prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                    cvx.sum(w) == 1, w >= 0,
                    cvx.sum(cvx.abs(w - weight_old)) <= turnover
                ])
                prob.solve()
                print('Solver Status : ', prob.status)

                # 计算回归 R2
                n = len(y)
                k = x.shape[1]
                tss = np.sum((y - np.mean(y))**2) / n
                y_res = y - np.dot(x, w.value)
                rss = np.sum(y_res**2) / (n - k - 1)
                r2 = 1 - rss / tss

                params_add = pd.DataFrame(w.value,
                                          columns=[period_end_date],
                                          index=data_periods.columns[1:]).T
                params_add.loc[period_end_date, "R2"] = r2
                print('new', params_add)
                params_old = params_add

            else:
                last_date = Date().get_trade_date_offset(period_end_date, -1)
                params_old = self.get_fund_regression_exposure_index_date(
                    reg_code, last_date)
                params_old = params_old.T
                print("old", params_old)
                params_add = params_old

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        file = '%s_%s_%s.csv' % (self.file_prefix, self.folder_name, reg_code)
        out_file = os.path.join(self.index_exposure_path, file)

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
    def cal_style_position(self, beg_date, end_date, code):
        """ 计算一个基金或指数的风格仓位和仓位 利用OLS有约束回归 """

        x_pct = self.data_return[self.index_code_list]
        x_pct = x_pct.dropna(how='all')
        y_pct = pd.DataFrame(self.data_return[code])
        y_pct = y_pct.dropna()

        all_date_series = Date().get_trade_date_series(beg_date,
                                                       end_date,
                                                       period="D")
        y_series = Date().get_trade_date_series(y_pct.index[0],
                                                y_pct.index[-1])
        date_series = list(set(y_series) & set(all_date_series))
        date_series.sort()
        error = False

        for i_date in range(len(date_series)):

            ed_date = date_series[i_date]
            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -self.regress_length)
            last_date = Date().get_trade_date_offset(ed_date, -1)

            x_pct_period = x_pct.loc[bg_date:ed_date, :]
            x_pct_period = x_pct_period.T.dropna().T
            x_columns = x_pct_period.columns
            data = pd.concat([y_pct, x_pct_period], axis=1)
            data = data.dropna()

            # 如果是第一天或者上次结果错误 则开放换手率 并假定上次平均持仓

            if i_date != 0:
                turnover_daily = self.turnover_daily
                old_weight = old_weight.loc[x_columns, :]
                old_weight = old_weight.fillna(0.0)
            else:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.0

            if error:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.00

            # print(error, old_weight.columns)
            print("## Cal Regress %s %s %s %s TurnOver %s##" %
                  (code, bg_date, ed_date, data.shape, turnover_daily))

            if len(data) >= self.regress_length_min:
                y = data[code].values
                x = data.iloc[:, 1:].values
                k = x.shape[1]
                old = old_weight.T.values[0]

                try:
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0,
                        cvx.sum(w[1:]) >= self.stock_ratio_low,
                        cvx.sum(w[1:]) <= self.stock_ratio_up,
                        cvx.sum(cvx.abs(w - old)) <= turnover_daily, w >= 0
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=x_columns)
                    stock_sum = params_add.loc[self.index_code_list[1:],
                                               ed_date].sum()
                    concat_data = pd.concat([params_add, old_weight], axis=1)
                    concat_data = concat_data.dropna()
                    turnover_real = (concat_data[last_date] -
                                     concat_data[ed_date]).abs().sum()

                    params_add.loc['StockRatio', ed_date] = stock_sum
                    params_add.loc['BondRatio', ed_date] = params_add.loc[
                        self.index_code_list[0], ed_date]
                    params_add.loc['TurnOverDaily', ed_date] = turnover_real
                    print(params_add.T)
                    old_weight = params_add
                    error = False
                except Exception as e:
                    print(end_date, code, "回归失败")
                    error = True
            else:
                print(end_date, code, "数据长度不够")
                error = True

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path,
                                'RestraintOLSStylePosition_%s.csv' % code)

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
    def cal_fund_regression_risk_alpha_return_index(self, fund, beg_date,
                                                    end_date):

        # 参数
        ####################################################################
        exposure_index = FundRegressionExposureIndex(
        ).get_fund_regression_exposure_index(fund)

        if exposure_index is not None:

            # 取得数据 指数收益率数据 和 基金涨跌幅数据
            ####################################################################
            for i_index in range(len(self.index_code_list)):
                index_code = self.index_code_list[i_index]
                index_return = Index().get_index_factor(index_code,
                                                        attr=["PCT"])
                if i_index == 0:
                    index_return = Index().get_index_factor(index_code,
                                                            attr=["PCT"])
                    index_return_all = index_return
                else:
                    index_return_all = pd.concat(
                        [index_return_all, index_return], axis=1)

            index_return_all.columns = self.index_code_list

            if fund[len(fund) - 2:] == 'OF':
                fund_return = FundFactor().get_fund_factor(
                    "Repair_Nav_Pct", None, [fund]) / 100.0
                fund_return.columns = ["FundReturn"]
            else:
                fund_return = Index().get_index_factor(fund, attr=["PCT"])
                fund_return.columns = ["FundReturn"]

            exposure_index = exposure_index.dropna(how="all")
            index_exposure_return = index_return_all.mul(exposure_index)
            index_exposure_return = index_exposure_return.dropna(how="all")
            data = pd.concat([fund_return, index_exposure_return], axis=1)
            data = data.dropna(how="all")
            data = data.loc[index_exposure_return.index, :]
            data = data.dropna(subset=["FundReturn"])
            data["SumReturn"] = data[self.index_code_list].sum(axis=1,
                                                               skipna=True)
            data["AlphaReturn"] = data["FundReturn"] - data["SumReturn"]
            data = data.loc[beg_date:end_date, :]
            data["CumFundReturn"] = (data["FundReturn"] + 1.0).cumprod() - 1.0
            data["CumAlphaReturn"] = (data["AlphaReturn"] +
                                      1.0).cumprod() - 1.0
            data["CumSumReturn"] = (data["SumReturn"] + 1.0).cumprod() - 1.0

            # 合并新数据
            ####################################################################
            out_path = self.data_path
            out_file = os.path.join(out_path, self.file_prefix + fund + '.csv')

            if os.path.exists(out_file):
                params_old = pd.read_csv(out_file,
                                         index_col=[0],
                                         encoding='gbk')
                params_old.index = params_old.index.map(str)
                params = FactorOperate().pandas_add_row(params_old, data)
            else:
                params = data
            print(params)
            params.to_csv(out_file)
Exemple #25
0
    def regress_fund(self, fund_code, beg_date, end_date):
        """ 回归基金净值 和上季度重仓股票的涨跌幅和债券基金 """

        period = "W"
        date_series = Date().get_trade_date_series(beg_date, end_date, period)

        fund_return = self.fund_pct[fund_code]
        fund_return = fund_return.dropna()
        date_series = list(set(date_series) & set(fund_return.index))
        date_series.sort()

        # 季报持仓
        quarter_weight = Fund().get_fund_holding_quarter(fund_code)
        r2_series = pd.DataFrame([], index=date_series, columns=['r2'])

        for i_date in range(0, len(date_series)):

            # 时间确定
            # 若此时离上个季报时间较短 则回归时间很短
            # 若此时离上个季报时间较长 则回归时间较长
            ed_date = date_series[i_date]
            ed_date = Date().get_trade_date_offset(ed_date, -0)
            quarter_date = Date().get_last_fund_quarter_date(ed_date)

            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -(self.regression_len - 1))
            bg_date = max(bg_date, quarter_date)
            bg_date = Date().get_trade_date_offset(bg_date, -0)

            date_diff = Date().get_trade_date_diff(bg_date, ed_date)

            # 上期持仓
            try:
                stock_weight = pd.DataFrame(quarter_weight[quarter_date])
                stock_weight = stock_weight.dropna()
                stock_weight.columns = ['Weight']

                # 收益率数据
                data = pd.concat([fund_return, self.stock_pct, self.bold_pct],
                                 axis=1)
                data['885062.WI'] = data['885062.WI'].fillna(0.0)
                regress_date_series = Date().get_trade_date_series(
                    bg_date, ed_date)
                data = data.loc[regress_date_series, :]
                data = data.T.dropna(thresh=self.regression_min_len).T
                data = data.fillna(data.mean(axis=1))

                # 股票池
                stock_pool = list(stock_weight.index)
                stock_pool = list(set(stock_pool) & set(data.columns[1:]))
                stock_pool.sort()
                stock_pool.append("885062.WI")

                stock_ratio = self.get_fund_stock_ratio(
                    fund_code, quarter_date)
                stock_weight['Weight'] /= stock_weight['Weight'].sum()
                stock_weight['Weight'] *= stock_ratio
                stock_weight.loc["885062.WI", "Weight"] = 100 - stock_ratio
                stock_weight /= 100.0
                stock_weight = stock_weight.loc[stock_pool, :]
                stock_weight['Weight'] /= stock_weight['Weight'].sum()

                print("## Cal Regress %s %s %s %s %s ##" %
                      (fund_code, quarter_date, bg_date, ed_date, len(data)))

                if (len(data) > self.regression_min_len) and (len(stock_pool) >
                                                              4):

                    # 利用股票拟合基金收益率 最小化跟踪误差的前提
                    # 指数权重之和为1 指数不能做空 指数和上期季报权重换手不能太大

                    y = data[fund_code].values / 100.0
                    x = data[stock_pool].values / 100.0
                    n = len(y)
                    k = x.shape[1]
                    weight_old = stock_weight.T.values[0]
                    turnover = date_diff * 0.8 / 100
                    print("TurnOver %s " % turnover)

                    # 最优化
                    ##############################################################################
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0, w >= 0,
                        cvx.sum(cvx.abs(w - weight_old)) <= turnover
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=stock_pool)

                    # 计算回归R2
                    ##############################################################################
                    tss = np.sum((y - np.mean(y))**2) / n
                    y_res = y - np.dot(x, w.value)
                    rss = np.sum(y_res**2) / (n - k - 1)
                    r2 = 1 - rss / tss
                    params_add.loc["R2", ed_date] = r2

                    print(params_add.T)

                else:
                    params_add = pd.DataFrame([],
                                              columns=[ed_date],
                                              index=stock_pool)
            except Exception as e:
                params_add = pd.DataFrame([], columns=[ed_date])

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path_exposure, fund_code + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
Exemple #26
0
    def cal_barra_exposure_return(self):
        """ 计算满仓 基金暴露、指数暴露、超额暴露、因子收益、基金超额暴露收益 """

        # 参数
        type_list = ["STYLE", "COUNTRY", "INDUSTRY"]

        # 得到基金(满仓)相对于跟踪指数(满仓)超额暴露
        exposure_fund = MfcData().get_mfc_holding_barra_exposure(
            self.fund_name, self.beg_date_pre, self.end_date)
        exposure_index = Index().get_index_exposure(self.index_code,
                                                    self.beg_date_pre,
                                                    self.end_date, type_list)
        exposure_excess = exposure_fund.sub(exposure_index)
        exposure_excess = exposure_excess.dropna()

        # 添加 Country Factor = 1.0
        factor_name = Barra().get_factor_name(type_list=["COUNTRY"])
        factor_name = list(factor_name["NAME_EN"].values)
        exposure_excess[factor_name] = 1.0

        # 前一天的 Exposure 对应后一天的 Factor Return
        exposure_excess.index = exposure_excess.index.map(
            lambda x: Date().get_trade_date_offset(x, 1))

        # 取得当日的 Factor Return
        factor_return = Barra().get_factor_return(self.beg_date, self.end_date,
                                                  type_list)

        # 计算超额暴露带来的收益部分
        [exposure, factor_return] = FactorOperate().make_same_index_columns(
            [exposure_excess, factor_return])
        fund_risk_factor_return = exposure.mul(factor_return)

        # 调整列的位置
        factor_name = Barra().get_factor_name(type_list=type_list)
        factor_name = list(factor_name["NAME_EN"].values)
        fund_risk_factor_return = fund_risk_factor_return[factor_name]

        # 分别计算 Style Industry RiskFactor = Style + Industry
        factor_name = Barra().get_factor_name(type_list=['STYLE'])
        factor_name = list(factor_name["NAME_EN"].values)
        fund_risk_factor_return['Style'] = fund_risk_factor_return[
            factor_name].sum(axis=1)

        factor_name = Barra().get_factor_name(type_list=['INDUSTRY'])
        factor_name = list(factor_name["NAME_EN"].values)
        fund_risk_factor_return['Industry'] = fund_risk_factor_return[
            factor_name].sum(axis=1)

        factor_name = Barra().get_factor_name(type_list=type_list)
        factor_name = list(factor_name["NAME_EN"].values)
        fund_risk_factor_return['RiskFactor'] = fund_risk_factor_return[
            factor_name].sum(axis=1)

        # 整理返回区间内的所有数据=基金暴露+指数暴露+超额暴露+因子收益+基金超额暴露收益
        fund_risk_factor_return = fund_risk_factor_return.loc[
            self.beg_date:self.end_date, :]
        exposure = exposure.loc[self.beg_date:self.end_date, :]
        factor_return = factor_return.loc[self.beg_date:self.end_date, :]

        fund_risk_factor_return /= 100.0
        factor_return /= 100.0

        self.save_file_excel(fund_risk_factor_return, "Barra", "基金风格收益",
                             "0.00%")
        self.save_file_excel(factor_return, "Barra", "风格因子收益", "0.00%")
        self.save_file_excel(exposure, "Barra", "基金超额暴露", "0.000")
        self.save_file_excel(exposure_fund, "Barra", "基金暴露", "0.000")
        self.save_file_excel(exposure_index, "Barra", "指数暴露", "0.000")