Example #1
0
    def load_macro_data_wind(self,
                             macro_code="M0000545",
                             beg_date="19900101",
                             end_date=datetime.today().strftime("%Y%m%d")):
        """ 下载宏观数据 """

        from WindPy import w
        w.start()

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 下载数据
        ##############################################################################
        data = w.edb(macro_code, beg_date, end_date, "Fill=Previous")
        new_data = pd.DataFrame(data.Data,
                                columns=data.Times,
                                index=data.Codes).T
        new_data = new_data.dropna()
        new_data.index = new_data.index.map(lambda x: x.strftime('%Y%m%d'))

        print(" Loading Macro Data %s From %s To %s " %
              (macro_code, beg_date, end_date))
        out_file = os.path.join(self.data_path, macro_code + '.csv')

        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            print(" File No Exist ", macro_code)
            data = new_data

        data = data.dropna(how='all')
        data.to_csv(out_file)
Example #2
0
    def cal_mfc_holding_barra_exposure_period(self, fund_name, beg_date,
                                              end_date):
        """ 计算某只基金在一段时间内暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date)
        new_data = pd.DataFrame()

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_mfc_holding_barra_exposure_date(fund_name, date)
            new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.exposure_data_path,
                                "MfcRiskExposure_" + fund_name + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data

        type_list = ["STYLE", "COUNTRY", "INDUSTRY"]
        name = Barra().get_factor_name(type_list=type_list)
        data = data[list(name.NAME_EN.values)]
        data.to_csv(out_file)
Example #3
0
    def cal_mfc_holding_alpha_exposure_period(self, fund_name,
                                              factor_name_list, beg_date,
                                              end_date):
        """ 计算某只基金在一段时间内暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date)
        new_data = pd.DataFrame()

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_mfc_holding_alpha_exposure_date(
                fund_name, factor_name_list, date)
            new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.exposure_data_path,
                                "MfcAlphaExposure_" + fund_name + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data

        data = data[factor_name_list]
        data.to_csv(out_file)
Example #4
0
    def cal_index_exposure(self,
                           index_code="000300.SH",
                           beg_date="20031231",
                           end_date=datetime.today().strftime("%Y%m%d"),
                           period="D"):

        """ 计算一段时间的BARRA暴露 """

        date_series_daily = Date().get_trade_date_series(beg_date, end_date, period=period)

        for i_date in range(len(date_series_daily)):
            date = date_series_daily[i_date]
            res = self.cal_index_exposure_date(index_code, date)
            if i_date == 0:
                new_data = res
            else:
                new_data = pd.concat([new_data, res], axis=0)

        out_file = os.path.join(self.data_path_exposure,  "Index_Barra_Exposure_" + index_code + '.csv')
        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            data = new_data
        data.to_csv(out_file)
    def cal_fund_holder_risk_alpha_return_quarter(self, fund, end_date):
        """ 根据季报持仓风格暴露进行收益拆分 """

        beg_date = "20040101"
        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        fund_exposure = FundHolderExposureQuarter(
        ).get_fund_holder_exposure_quarter_daily(fund, beg_date, end_date)
        barra_riskfactor_return = Barra().get_factor_return(
            beg_date, end_date, type_list=type_list)
        date_series = Date().get_trade_date_series(beg_date, end_date)
        fund_pct = FundFactor().get_fund_factor("Repair_Nav_Pct",
                                                fund_pool=[fund],
                                                date_list=date_series)
        fund_pct.columns = ["FundReturn"]

        if fund_exposure is None:
            return None

        fund_riskfactor_return = barra_riskfactor_return.mul(fund_exposure)
        fund_return = pd.concat([fund_pct, fund_riskfactor_return], axis=1)
        fund_return = fund_return.dropna()

        barra_factor_name = list(
            Barra().get_factor_name(type_list=["STYLE"])["NAME_EN"].values)
        fund_return["StyleReturn"] = fund_return[barra_factor_name].sum(axis=1)
        barra_factor_name = list(
            Barra().get_factor_name(type_list=["INDUSTRY"])["NAME_EN"].values)
        fund_return["IndustryReturn"] = fund_return[barra_factor_name].sum(
            axis=1)
        barra_factor_name = list(
            Barra().get_factor_name(type_list=["COUNTRY"])["NAME_EN"].values)
        fund_return["CountryReturn"] = fund_return[barra_factor_name].sum(
            axis=1)
        barra_factor_name = ["StyleReturn", "IndustryReturn", "CountryReturn"]
        fund_return["SumReturn"] = fund_return[barra_factor_name].sum(axis=1)
        fund_return["AlphaReturn"] = fund_return["FundReturn"] - fund_return[
            "SumReturn"]

        data_new = fund_return.dropna()

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_holding_risk_alpha_return_quarter')
        out_file = os.path.join(
            out_path, 'Fund_Holder_Risk_Alpha_Return_Quarter_' + fund + "_" +
            end_date + '.csv')
        print(out_file)

        if os.path.exists(out_file):
            data_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            data_old.index = data_old.index.map(str)
            params = FactorOperate().pandas_add_row(data_old, data_new)
        else:
            params = data_new
        params.to_csv(out_file)
        return data_new
Example #6
0
    def load_index_factor(self,
                          index_code="000300.SH",
                          beg_date=None,
                          end_date=datetime.today().strftime("%Y%m%d"),
                          primary=False):
        """ 下载一个指数 最近的Factor """

        from WindPy import w
        w.start()

        out_file = os.path.join(self.data_data_factor, index_code + '.csv')
        if beg_date is None and os.path.exists(out_file):
            beg_date = Date().get_trade_date_offset(end_date, -20)

        if beg_date is None and not os.path.exists(out_file):
            try:
                base_data = w.wsd(index_code, "basedate")
                beg_date = base_data.Data[0][0].strftime("%Y%m%d")
            except Exception as e:
                beg_date = '19991231'

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)
        print(beg_date, end_date, index_code, primary)

        # 下载数据
        ##############################################################################

        if primary:
            index_data = w.wsd(index_code, "close,pe_ttm,pb_lf", beg_date,
                               end_date, "Fill=Previous")
        else:
            index_data = w.wsd(index_code, "close", beg_date, end_date,
                               "Fill=Previous")

        new_data = pd.DataFrame(index_data.Data,
                                index=index_data.Fields,
                                columns=index_data.Times).T
        new_data.index = new_data.index.map(lambda x: x.strftime('%Y%m%d'))
        print(new_data)

        try:
            new_data['PCT'] = new_data['CLOSE'].pct_change()
            print(" Loading Index Factor ", index_code)

            if os.path.exists(out_file):
                data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
                data.index = data.index.map(str)
                data = FactorOperate().pandas_add_row(data, new_data)
            else:
                print(" File No Exist ", index_code)
                data = new_data
            data = data.dropna(how='all')
            data.to_csv(out_file)
        except Exception as e:
            print(e)
            print(" Loading Index Factor Error", index_code)
Example #7
0
    def load_fund_factor(self, factor_name, beg_date, end_date):
        """ 财汇数据库下载基金因子数据(增量更新) """

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        print("Loading Fund Factor %s From %s To %s" %
              (factor_name, beg_date, end_date))
        new_data = FinDb().load_raw_data_filter_period(factor_name, beg_date,
                                                       end_date)
        fund_info_data = FundStatic().get_findb_fund_info()
        table_name, field_en, filter_field, field_ch, val_name = FinDb(
        ).get_load_findb_param(factor_name)

        new_data = pd.merge(new_data, fund_info_data, on="证券内码", how='inner')
        new_data = pd.DataFrame(
            new_data[val_name].values,
            index=[list(new_data['基金代码'].values),
                   list(new_data['日期'].values)])
        new_data = new_data.sort_index()
        new_data = new_data[~new_data.index.duplicated()]
        new_data = new_data.unstack()

        new_data.columns = new_data.columns.droplevel(level=0)
        new_data = new_data.T
        new_data = new_data.dropna(how='all')
        new_data.index = new_data.index.map(str)

        out_file = os.path.join(self.data_path_factor, factor_name + '.csv')

        if os.path.exists(out_file):
            data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
            data.index = data.index.map(str)
            data = FactorOperate().pandas_add_row(data, new_data)
        else:
            print(" File No Exist ", factor_name)
            data = new_data

        data = data.dropna(how='all')
        data.to_csv(out_file)
Example #8
0
    def load_mfc_public_fund_nav(self, beg_date=None, end_date=None):
        """ wind下载多有泰达公募基金的复权净值 增量更新(默认更新最近两个月) """

        if end_date is None:
            end_date = Date().change_to_str(datetime.today())
        if beg_date is None:
            beg_date = Date().get_trade_date_offset(end_date, -40)

        from WindPy import w
        w.start()

        data = MfcGetData().get_mfc_fund_info()
        data = data[data.Type == "公募"]

        for i_code in range(len(data)):

            fund_code = data.Code.values[i_code]
            nav_data = w.wsd(fund_code, "nav,NAV_adj,NAV_acc,NAV_adj_return1",
                             beg_date, end_date, "")
            nav_data = pd.DataFrame(nav_data.Data,
                                    index=nav_data.Fields,
                                    columns=nav_data.Times).T
            nav_data.index = nav_data.index.map(lambda x: x.strftime('%Y%m%d'))
            new_data = nav_data.dropna(subset=['NAV_ADJ'])
            print(" Load Mfc Public Fund %s Nav " % fund_code)

            # 合并存储数据
            file = os.path.join(self.data_path, "nav\public_fund",
                                fund_code + "_Nav.csv")
            if os.path.exists(file):
                old_data = pd.read_csv(file, index_col=[0], encoding='gbk')
                old_data.index = old_data.index.map(str)
                nav_data = FactorOperate().pandas_add_row(old_data, new_data)
            else:
                nav_data = new_data

            nav_data.to_csv(file)
Example #9
0
    def cal_fund_holder_exposure_halfyear(self, fund_code, beg_date, end_date):
        """ 计算单个基金的半年持仓暴露(注意计算的是非满仓暴露) """

        # fund_code, beg_date, end_date = "000001.OF", "20170101", "20190101"

        type_list = ['COUNTRY', 'STYLE', 'INDUSTRY']
        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        out_file = os.path.join(
            self.halfyear_exposure_path,
            'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code)

        if not os.path.exists(out_file):
            beg_date = "20040101"

        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='S')
        fund_holding = FundHolder().get_fund_stock_weight_halfyear(fund_code)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
            print(date_series)
        else:
            return None

        if len(date_series) > 0:

            for i_date in range(0, len(date_series)):

                date = date_series[i_date]
                report_date = Date().get_normal_date_month_end_day(date)
                trade_date = Date().get_trade_date_month_end_day(date)
                print("Calculate HalfYear Holder Exposure %s %s" %
                      (fund_code, report_date))

                barra_exposure = Barra().get_factor_exposure_date(
                    trade_date, type_list)
                fund_holding_date = FundHolder(
                ).get_fund_stock_weight_halfyear(fund_code)

                if (barra_exposure is None) or (len(fund_holding_date) == 0):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    fund_holding_date = pd.DataFrame(fund_holding[report_date])
                    fund_holding_date = fund_holding_date.dropna()
                    fund_holding_date = fund_holding_date.sort_values(
                        by=[report_date], ascending=False)
                    fund_holding_date.columns = ["Weight"]
                    fund_holding_date /= 100.0
                    data = pd.concat([fund_holding_date, barra_exposure],
                                     axis=1)
                    data = data.dropna()

                    if (len(data) == 0) or (data is None):
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])
                    else:
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])

                        for i_factor in range(len(barra_name)):

                            factor_name = barra_name[i_factor]
                            data_weight = data[['Weight', factor_name]]
                            data_weight['StockExposure'] = data_weight[
                                'Weight'] * data_weight[factor_name]
                            exp = data_weight['StockExposure'].sum()
                            exposure_add.ix[report_date, factor_name] = exp

                        country_name = Barra().get_factor_name(
                            ["COUNTRY"])["NAME_EN"].values[0]
                        position = FundFactor().get_fund_factor(
                            "Stock_Ratio",
                            date_list=[report_date],
                            fund_pool=[fund_code])
                        exposure_add.ix[
                            report_date,
                            country_name] = position.values[0][0] / 100

                if i_date == 0:
                    exposure_new = exposure_add
                else:
                    exposure_new = pd.concat([exposure_new, exposure_add],
                                             axis=0)
        else:
            exposure_new = pd.DataFrame([])

        # 合并新数据
        ####################################################################
        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new

        if len(params) > 0:
            params = params[barra_name]
        params.to_csv(out_file)
    def cal_fund_regression_exposure_index(self, fund, beg_date, end_date, period="D"):

        """
        计算一只基金每日对不同指数的暴露
        """

        # 参数
        ####################################################################
        one_index_up_limit = 1.0
        one_index_low_limit = 0.0
        sum_index = 1.0

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得 指数收益率数据
        ####################################################################
        for i_index in range(len(self.index_code_list)):
            index_code = self.index_code_list[i_index]
            index_return = Index().get_index_factor(index_code, Nattr=["PCT"])
            if i_index == 0:
                index_return = Index().get_index_factor(index_code, attr=["PCT"])
                index_return_all = index_return
            else:
                index_return_all = pd.concat([index_return_all, index_return], axis=1)

        index_return_all.columns = self.index_code_list

        # 取得 基金涨跌幅数据
        ####################################################################
        if fund[len(fund)-2:] == 'OF':
            fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None, [fund]) / 100.0
        else:
            fund_return = Index().get_index_factor(fund, attr=["PCT"])
            fund_return.columns = [fund]

        # 合并数据
        ####################################################################
        data = pd.concat([fund_return, index_return_all], axis=1)
        data = data.dropna(subset=[fund])

        # 回归日期
        ####################################################################
        date_series = Date().get_trade_date_series(beg_date, end_date, period=period)
        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 循环优化计算每天的暴露
        ####################################################################

        for i_date in range(0, len(date_series)):

            # 约束回归所需要的数据
            #############################################################################################
            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(period_end_date, -self.regression_period)

            period_date_series = Date().get_trade_date_series(period_beg_date, period_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna(subset=[fund])
            data_periods = data_periods.T.dropna(how='all').T
            data_periods = data_periods.T.fillna(data_periods.mean(axis=1)).T
            data_periods = data_periods.dropna()

            # 有约束的回归 可以转换为二次规划
            #############################################################################################
            if len(data_periods) > self.regression_period_min and (len(data_periods.columns) > 1):

                # 平方和最小
                #############################################################################################
                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values

                P = 2 * np.dot(x.T, x)
                Q = -2 * np.dot(x.T, y)

                # 单个指数上下限为 0
                #############################################################################################
                G_up = np.diag(np.ones(x.shape[1]))
                G_low = - np.diag(np.ones(x.shape[1]))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack(np.ones((x.shape[1], 1))) * one_index_up_limit
                h_low = - np.row_stack(np.ones((x.shape[1], 1))) * one_index_low_limit
                h = np.row_stack((h_up, h_low))

                #############################################################################################
                A = np.column_stack(np.ones((x.shape[1], 1)))
                b = np.array([sum_index])

                # 开始规划求解
                ############################################################################################
                try:
                    P = matrix(P)
                    Q = matrix(Q)
                    G = matrix(G)
                    h = matrix(h)
                    A = matrix(A)
                    b = matrix(b)
                    result = sol.qp(P, Q, G, h, A, b)
                    params_add = pd.DataFrame(np.array(result['x'][0:]),
                                              columns=[period_end_date], index=data_periods.columns[1:]).T
                    print("########## Fund Regression Index Exposure GF %s %s ##########" % (fund, period_end_date))
                except Exception as e:
                    params_add = pd.DataFrame([], columns=[period_end_date], index=data_periods.columns[1:]).T
                    print("########## Quadratic Programming is InCorrect %s %s ##########" % (fund, period_end_date))
            else:
                params_add = pd.DataFrame([], columns=[period_end_date], index=data_periods.columns[1:]).T
                print("########## Fund Regression Data Len is Too Small %s %s ##########" % (fund, period_end_date))

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据 并存储数据
        ####################################################################
        out_file = os.path.join(self.data_path, self.file_prefix + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new
        params.to_csv(out_file)
    def cal_fund_regression_risk_alpha_return_style(self, fund, beg_date,
                                                    end_date):

        # 参数
        ####################################################################
        exposure_index = FundRegressionExposureStyle(
        ).get_fund_regression_exposure_style(fund)

        if exposure_index is not None:

            # 取得数据 指数收益率数据 和 基金涨跌幅数据
            ####################################################################
            barra_name = list(Barra().get_factor_name(['STYLE'
                                                       ])['NAME_EN'].values)
            barra_name.extend(
                list(Barra().get_factor_name(["COUNTRY"])['NAME_EN'].values))

            barra_return = Barra().get_factor_return(
                None, None, type_list=["INDUSTRY", "COUNTRY", "STYLE"])
            barra_return = barra_return[barra_name]
            barra_return /= 100.0

            if fund[len(fund) - 2:] == 'OF':
                fund_return = FundFactor().get_fund_factor(
                    "Repair_Nav_Pct", None, [fund]) / 100.0
                fund_return.columns = ["FundReturn"]
            else:
                fund_return = Index().get_index_factor(fund, attr=["PCT"])
                fund_return.columns = ["FundReturn"]

            exposure_index = exposure_index.dropna(how="all")
            index_exposure_return = barra_return.mul(exposure_index)
            index_exposure_return = index_exposure_return.dropna(how="all")
            data = pd.concat([fund_return, index_exposure_return], axis=1)
            data = data.dropna(how="all")
            data = data.loc[index_exposure_return.index, :]
            data = data.dropna(subset=["FundReturn"])
            data["SumReturn"] = data[barra_name].sum(axis=1, skipna=True)
            data["AlphaReturn"] = data["FundReturn"] - data["SumReturn"]
            data = data.loc[beg_date:end_date, :]
            data["CumFundReturn"] = (data["FundReturn"] + 1.0).cumprod() - 1.0
            data["CumAlphaReturn"] = (data["AlphaReturn"] +
                                      1.0).cumprod() - 1.0
            data["CumSumReturn"] = (data["SumReturn"] + 1.0).cumprod() - 1.0

            # 合并新数据
            ####################################################################
            out_path = os.path.join(self.data_path_exposure,
                                    'fund_regression_risk_alpha_return_style')
            out_file = os.path.join(
                out_path, 'Fund_Regression_Risk_Alpha_Style_' + fund + '.csv')

            if os.path.exists(out_file):
                params_old = pd.read_csv(out_file,
                                         index_col=[0],
                                         encoding='gbk')
                params_old.index = params_old.index.map(str)
                params = FactorOperate().pandas_add_row(params_old, data)
            else:
                params = data
            print(params)
            params.to_csv(out_file)
Example #12
0
    def cal_fund_regression_exposure_index(self,
                                           reg_code,
                                           beg_date,
                                           end_date,
                                           period="D"):
        """
        回归单只基金区间内指数暴露
        用指数去你基金收益率 最小化跟踪误差的前提
        指数权重之和为1 指数不能做空 指数和上期权重换手不能太大
        """

        date_series = Date().get_trade_date_series(beg_date, end_date, period)
        data_beg_date = Date().get_trade_date_offset(beg_date,
                                                     -self.regression_period)
        end_date = Date().change_to_str(end_date)

        code_list = [reg_code]
        code_list.extend(self.index_code_list)

        data = self.asset_pct.loc[data_beg_date:end_date, code_list]
        data = data.dropna(subset=[reg_code])

        if len(data) < self.regression_period:
            return None

        print("Regression %s With %s" % (reg_code, self.index_code_list))
        print("Length of Return Data Is %s " % len(data))

        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 上次计算的风格
        last_date = Date().get_trade_date_offset(date_series[0], -1)
        params_old = self.get_fund_regression_exposure_index_date(
            reg_code, last_date)
        params_old = params_old.T
        print("old", params_old)

        for i_date in range(0, len(date_series)):

            # 回归所需要的数据 过去60个交易日

            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)
            data_end_date = Date().get_trade_date_offset(period_end_date, -0)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, data_end_date)
            data_periods = data.loc[period_date_series, :]
            data_periods = data_periods.dropna(subset=[reg_code])
            data_periods = data_periods.T.dropna(how='all').T
            data_periods = data_periods.T.fillna(data_periods.mean(axis=1)).T
            data_periods = data_periods.dropna()

            print(
                "########## Calculate Regression Exposure %s %s %s %s ##########"
                % (reg_code, period_beg_date, period_end_date,
                   len(data_periods)))

            if len(data_periods) > self.regression_period_min and (len(
                    data_periods.columns) > 1):

                y = data_periods.iloc[:, 0].values
                x = data_periods.iloc[:, 1:].values
                n = x.shape[1]

                if params_old.empty or params_old.sum().sum() < 0.5:
                    params_old = pd.DataFrame(n * [1 / n],
                                              columns=[period_end_date],
                                              index=data_periods.columns[1:]).T

                turnover = self.turnover
                params_old = params_old.loc[:, data_periods.columns[1:]]
                params_old = params_old.fillna(0.0)
                weight_old = params_old.values[0]

                w = cvx.Variable(n)
                sigma = y - x * w
                prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                    cvx.sum(w) == 1, w >= 0,
                    cvx.sum(cvx.abs(w - weight_old)) <= turnover
                ])
                prob.solve()
                print('Solver Status : ', prob.status)

                # 计算回归 R2
                n = len(y)
                k = x.shape[1]
                tss = np.sum((y - np.mean(y))**2) / n
                y_res = y - np.dot(x, w.value)
                rss = np.sum(y_res**2) / (n - k - 1)
                r2 = 1 - rss / tss

                params_add = pd.DataFrame(w.value,
                                          columns=[period_end_date],
                                          index=data_periods.columns[1:]).T
                params_add.loc[period_end_date, "R2"] = r2
                print('new', params_add)
                params_old = params_add

            else:
                last_date = Date().get_trade_date_offset(period_end_date, -1)
                params_old = self.get_fund_regression_exposure_index_date(
                    reg_code, last_date)
                params_old = params_old.T
                print("old", params_old)
                params_add = params_old

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        file = '%s_%s_%s.csv' % (self.file_prefix, self.folder_name, reg_code)
        out_file = os.path.join(self.index_exposure_path, file)

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
    def cal_fund_holder_exposure_quarter(self, fund, beg_date, end_date):
        """  计算单个基金的季度持仓暴露 (前十大重仓暴露) """

        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='Q')
        fund_holding = FundHolder().get_fund_stock_weight_quarter(fund)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
        else:
            return None

        for i_date in range(0, len(date_series)):

            date = date_series[i_date]
            report_date = Date().get_normal_date_month_end_day(date)
            trade_date = Date().get_trade_date_month_end_day(date)

            barra_name = list(
                Barra().get_factor_name(type_list)['NAME_EN'].values)
            barra_exposure = Barra().get_factor_exposure_date(
                trade_date, type_list)

            print(
                "########## Calculate Quarter Holder Exposure %s %s ##########"
                % (fund, report_date))

            if (barra_exposure is None) or (fund_holding is None):
                exposure_add = pd.DataFrame([],
                                            columns=barra_name,
                                            index=[report_date])
            else:
                fund_holding_date = pd.DataFrame(fund_holding[report_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date = fund_holding_date.sort_values(
                    by=[report_date], ascending=False)
                fund_holding_date.columns = ["Weight"]
                data = pd.concat([fund_holding_date, barra_exposure], axis=1)
                data = data.dropna()

                if (len(data) == 0) or (data is None):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])

                    for i_factor in range(len(barra_name)):
                        factor_name = barra_name[i_factor]
                        data_weight = data[['Weight', factor_name]]
                        data_weight['StockExposure'] = data['Weight'] * data[
                            factor_name]
                        exposure_add.ix[report_date,
                                        factor_name] = data_weight[
                                            'StockExposure'].sum() / 100.0

                    country_name = Barra().get_factor_name(
                        ["COUNTRY"])["NAME_EN"].values[0]
                    position = FundFactor().get_fund_factor(
                        "Stock_Ratio",
                        date_list=[report_date],
                        fund_pool=[fund])
                    position = position.values[0][0]
                    exposure_add.ix[report_date, country_name] = position / 100

            if i_date == 0:
                exposure_new = exposure_add
            else:
                exposure_new = pd.concat([exposure_new, exposure_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_holding_exposure_quarter')
        out_file = os.path.join(
            out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv')

        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new
        params.to_csv(out_file)
    def cal_fund_regression_exposure_style(self,
                                           fund,
                                           beg_date,
                                           end_date,
                                           period="D"):

        # 参数
        ####################################################################
        up_style_exposure = 1.25
        up_position_exposure = 0.95
        low_position_exposure = 0.75
        position_sub = 0.08

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得数据 因子收益率数据 和 基金涨跌幅数据
        ####################################################################
        type_list = ['STYLE', 'COUNTRY']

        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        barra_return = Barra().get_factor_return(None, None, type_list)

        date_series = Date().get_trade_date_series(beg_date,
                                                   end_date,
                                                   period=period)

        if fund[len(fund) - 2:] == 'OF':
            fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None,
                                                       [fund])
        else:
            fund_return = Index().get_index_factor(fund, attr=["PCT"]) * 100
            fund_return.columns = [fund]

        data = pd.concat([fund_return, barra_return], axis=1)
        data = data.dropna()
        print(" Fund Code Total Len %s " % len(data))
        factor_number = len(barra_name)
        stock_ratio = FundFactor().get_fund_factor("Stock_Ratio", None,
                                                   [fund]) / 100

        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 循环回归计算每天的暴露 计算当天的暴露之时需要 前一天及之前数据
        ####################################################################

        for i_date in range(0, len(date_series)):

            # 回归所需要的数据
            ####################################################################
            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)
            data_end_date = Date().get_trade_date_offset(period_end_date, -0)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, data_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna()

            # 上个季度基金仓位
            #####################################################################################
            quarter_date = Date().get_last_fund_quarter_date(period_end_date)
            stock_ratio_fund = stock_ratio.loc[quarter_date, fund]
            print(
                "########## Calculate Regression Exposure %s %s %s %s %s %s ##########"
                % (fund, period_beg_date, period_end_date, quarter_date,
                   len(data_periods), stock_ratio_fund))

            if len(data_periods) > self.regression_period_min:

                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values
                x_add = sm.add_constant(x)

                low_position_exposure = max(stock_ratio_fund - position_sub,
                                            low_position_exposure)
                if np.isnan(low_position_exposure):
                    low_position_exposure = 0.75

                P = 2 * np.dot(x_add.T, x_add)
                Q = -2 * np.dot(x_add.T, y)

                G_up = np.diag(np.ones(factor_number + 1))
                G_low = -np.diag(np.ones(factor_number + 1))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                     np.array([up_position_exposure])))
                h_low = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                      np.array([-low_position_exposure])))
                h = np.row_stack((h_up, h_low))

                P = matrix(P)
                Q = matrix(Q)
                G = matrix(G)
                h = matrix(h)
                try:
                    result = sol.qp(P, Q, G, h)
                    params_add = pd.DataFrame(np.array(result['x'][1:]),
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)
                except Exception as e:
                    params_add = pd.DataFrame([],
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)

            else:
                params_add = pd.DataFrame([],
                                          columns=[period_end_date],
                                          index=barra_name).T
                print(params_add)

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_regression_exposure_style')
        out_file = os.path.join(
            out_path, 'Fund_Regression_Exposure_Style_' + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new
        print(params)
        params.to_csv(out_file)
    def cal_fund_regression_risk_alpha_return_index(self, fund, beg_date,
                                                    end_date):

        # 参数
        ####################################################################
        exposure_index = FundRegressionExposureIndex(
        ).get_fund_regression_exposure_index(fund)

        if exposure_index is not None:

            # 取得数据 指数收益率数据 和 基金涨跌幅数据
            ####################################################################
            for i_index in range(len(self.index_code_list)):
                index_code = self.index_code_list[i_index]
                index_return = Index().get_index_factor(index_code,
                                                        attr=["PCT"])
                if i_index == 0:
                    index_return = Index().get_index_factor(index_code,
                                                            attr=["PCT"])
                    index_return_all = index_return
                else:
                    index_return_all = pd.concat(
                        [index_return_all, index_return], axis=1)

            index_return_all.columns = self.index_code_list

            if fund[len(fund) - 2:] == 'OF':
                fund_return = FundFactor().get_fund_factor(
                    "Repair_Nav_Pct", None, [fund]) / 100.0
                fund_return.columns = ["FundReturn"]
            else:
                fund_return = Index().get_index_factor(fund, attr=["PCT"])
                fund_return.columns = ["FundReturn"]

            exposure_index = exposure_index.dropna(how="all")
            index_exposure_return = index_return_all.mul(exposure_index)
            index_exposure_return = index_exposure_return.dropna(how="all")
            data = pd.concat([fund_return, index_exposure_return], axis=1)
            data = data.dropna(how="all")
            data = data.loc[index_exposure_return.index, :]
            data = data.dropna(subset=["FundReturn"])
            data["SumReturn"] = data[self.index_code_list].sum(axis=1,
                                                               skipna=True)
            data["AlphaReturn"] = data["FundReturn"] - data["SumReturn"]
            data = data.loc[beg_date:end_date, :]
            data["CumFundReturn"] = (data["FundReturn"] + 1.0).cumprod() - 1.0
            data["CumAlphaReturn"] = (data["AlphaReturn"] +
                                      1.0).cumprod() - 1.0
            data["CumSumReturn"] = (data["SumReturn"] + 1.0).cumprod() - 1.0

            # 合并新数据
            ####################################################################
            out_path = self.data_path
            out_file = os.path.join(out_path, self.file_prefix + fund + '.csv')

            if os.path.exists(out_file):
                params_old = pd.read_csv(out_file,
                                         index_col=[0],
                                         encoding='gbk')
                params_old.index = params_old.index.map(str)
                params = FactorOperate().pandas_add_row(params_old, data)
            else:
                params = data
            print(params)
            params.to_csv(out_file)
    def cal_style_position(self, beg_date, end_date, code):
        """ 计算一个基金或指数的风格仓位和仓位 利用OLS有约束回归 """

        x_pct = self.data_return[self.index_code_list]
        x_pct = x_pct.dropna(how='all')
        y_pct = pd.DataFrame(self.data_return[code])
        y_pct = y_pct.dropna()

        all_date_series = Date().get_trade_date_series(beg_date,
                                                       end_date,
                                                       period="D")
        y_series = Date().get_trade_date_series(y_pct.index[0],
                                                y_pct.index[-1])
        date_series = list(set(y_series) & set(all_date_series))
        date_series.sort()
        error = False

        for i_date in range(len(date_series)):

            ed_date = date_series[i_date]
            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -self.regress_length)
            last_date = Date().get_trade_date_offset(ed_date, -1)

            x_pct_period = x_pct.loc[bg_date:ed_date, :]
            x_pct_period = x_pct_period.T.dropna().T
            x_columns = x_pct_period.columns
            data = pd.concat([y_pct, x_pct_period], axis=1)
            data = data.dropna()

            # 如果是第一天或者上次结果错误 则开放换手率 并假定上次平均持仓

            if i_date != 0:
                turnover_daily = self.turnover_daily
                old_weight = old_weight.loc[x_columns, :]
                old_weight = old_weight.fillna(0.0)
            else:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.0

            if error:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.00

            # print(error, old_weight.columns)
            print("## Cal Regress %s %s %s %s TurnOver %s##" %
                  (code, bg_date, ed_date, data.shape, turnover_daily))

            if len(data) >= self.regress_length_min:
                y = data[code].values
                x = data.iloc[:, 1:].values
                k = x.shape[1]
                old = old_weight.T.values[0]

                try:
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0,
                        cvx.sum(w[1:]) >= self.stock_ratio_low,
                        cvx.sum(w[1:]) <= self.stock_ratio_up,
                        cvx.sum(cvx.abs(w - old)) <= turnover_daily, w >= 0
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=x_columns)
                    stock_sum = params_add.loc[self.index_code_list[1:],
                                               ed_date].sum()
                    concat_data = pd.concat([params_add, old_weight], axis=1)
                    concat_data = concat_data.dropna()
                    turnover_real = (concat_data[last_date] -
                                     concat_data[ed_date]).abs().sum()

                    params_add.loc['StockRatio', ed_date] = stock_sum
                    params_add.loc['BondRatio', ed_date] = params_add.loc[
                        self.index_code_list[0], ed_date]
                    params_add.loc['TurnOverDaily', ed_date] = turnover_real
                    print(params_add.T)
                    old_weight = params_add
                    error = False
                except Exception as e:
                    print(end_date, code, "回归失败")
                    error = True
            else:
                print(end_date, code, "数据长度不够")
                error = True

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path,
                                'RestraintOLSStylePosition_%s.csv' % code)

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
Example #17
0
    def regress_fund(self, fund_code, beg_date, end_date):
        """ 回归基金净值 和上季度重仓股票的涨跌幅和债券基金 """

        period = "W"
        date_series = Date().get_trade_date_series(beg_date, end_date, period)

        fund_return = self.fund_pct[fund_code]
        fund_return = fund_return.dropna()
        date_series = list(set(date_series) & set(fund_return.index))
        date_series.sort()

        # 季报持仓
        quarter_weight = Fund().get_fund_holding_quarter(fund_code)
        r2_series = pd.DataFrame([], index=date_series, columns=['r2'])

        for i_date in range(0, len(date_series)):

            # 时间确定
            # 若此时离上个季报时间较短 则回归时间很短
            # 若此时离上个季报时间较长 则回归时间较长
            ed_date = date_series[i_date]
            ed_date = Date().get_trade_date_offset(ed_date, -0)
            quarter_date = Date().get_last_fund_quarter_date(ed_date)

            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -(self.regression_len - 1))
            bg_date = max(bg_date, quarter_date)
            bg_date = Date().get_trade_date_offset(bg_date, -0)

            date_diff = Date().get_trade_date_diff(bg_date, ed_date)

            # 上期持仓
            try:
                stock_weight = pd.DataFrame(quarter_weight[quarter_date])
                stock_weight = stock_weight.dropna()
                stock_weight.columns = ['Weight']

                # 收益率数据
                data = pd.concat([fund_return, self.stock_pct, self.bold_pct],
                                 axis=1)
                data['885062.WI'] = data['885062.WI'].fillna(0.0)
                regress_date_series = Date().get_trade_date_series(
                    bg_date, ed_date)
                data = data.loc[regress_date_series, :]
                data = data.T.dropna(thresh=self.regression_min_len).T
                data = data.fillna(data.mean(axis=1))

                # 股票池
                stock_pool = list(stock_weight.index)
                stock_pool = list(set(stock_pool) & set(data.columns[1:]))
                stock_pool.sort()
                stock_pool.append("885062.WI")

                stock_ratio = self.get_fund_stock_ratio(
                    fund_code, quarter_date)
                stock_weight['Weight'] /= stock_weight['Weight'].sum()
                stock_weight['Weight'] *= stock_ratio
                stock_weight.loc["885062.WI", "Weight"] = 100 - stock_ratio
                stock_weight /= 100.0
                stock_weight = stock_weight.loc[stock_pool, :]
                stock_weight['Weight'] /= stock_weight['Weight'].sum()

                print("## Cal Regress %s %s %s %s %s ##" %
                      (fund_code, quarter_date, bg_date, ed_date, len(data)))

                if (len(data) > self.regression_min_len) and (len(stock_pool) >
                                                              4):

                    # 利用股票拟合基金收益率 最小化跟踪误差的前提
                    # 指数权重之和为1 指数不能做空 指数和上期季报权重换手不能太大

                    y = data[fund_code].values / 100.0
                    x = data[stock_pool].values / 100.0
                    n = len(y)
                    k = x.shape[1]
                    weight_old = stock_weight.T.values[0]
                    turnover = date_diff * 0.8 / 100
                    print("TurnOver %s " % turnover)

                    # 最优化
                    ##############################################################################
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0, w >= 0,
                        cvx.sum(cvx.abs(w - weight_old)) <= turnover
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=stock_pool)

                    # 计算回归R2
                    ##############################################################################
                    tss = np.sum((y - np.mean(y))**2) / n
                    y_res = y - np.dot(x, w.value)
                    rss = np.sum(y_res**2) / (n - k - 1)
                    r2 = 1 - rss / tss
                    params_add.loc["R2", ed_date] = r2

                    print(params_add.T)

                else:
                    params_add = pd.DataFrame([],
                                              columns=[ed_date],
                                              index=stock_pool)
            except Exception as e:
                params_add = pd.DataFrame([], columns=[ed_date])

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path_exposure, fund_code + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)