Beispiel #1
0
    def cal_factor_barra_growth_5years_sales_growth(self, beg_date, end_date):

        """  过去5年每股营收 回归 等差数列 的系数 除以 每股盈利的平均值 """

        report_data = Stock().read_factor_h5("ReportDateDaily")
        total_income = Stock().read_factor_h5("OperatingIncomeTotal")
        total_share = Stock().read_factor_h5("TotalShare") / 100000000

        normal_date_series = Date().get_normal_date_series(total_share.columns[0], total_share.columns[-1])
        total_share = total_share.loc[:, normal_date_series]
        total_share = total_share.T.fillna(method='pad', limit=10).T

        total_share, total_income = StockFactor().make_same_index_columns([total_share, total_income])
        income_pre_share = total_income.div(total_share).T

        ips_ttm_growth = income_pre_share.rolling(4).sum()

        month = ips_ttm_growth.index[-1][4:6]
        ips_ttm_quarter = ips_ttm_growth.index
        ips_ttm_year = list(filter(lambda x: x[4:6] == month, list(ips_ttm_growth.index)))
        ips_ttm_growth = ips_ttm_growth.loc[ips_ttm_year, :]

        ips_ttm_growth = ips_ttm_growth.rolling(5).apply(self.slope)
        ips_ttm_growth = ips_ttm_growth.loc[ips_ttm_quarter, :]
        ips_ttm_growth = ips_ttm_growth.fillna(method='pad', limit=3).T
        ips_ttm_growth = StockFactor().change_quarter_to_daily_with_disclosure_date(ips_ttm_growth, report_data,
                                                                                    beg_date, end_date)

        self.save_risk_factor_exposure(ips_ttm_growth, self.raw_factor_name_5y_sale)
        ips_ttm_growth = FactorPreProcess().remove_extreme_value_mad(ips_ttm_growth)
        ips_ttm_growth = FactorPreProcess().standardization(ips_ttm_growth)
        self.save_risk_factor_exposure(ips_ttm_growth, self.factor_name_5y_sale)
    def cal_factor_barra_leverage_market_leverage(self):
        """
        市场杠杆 =(普通股市场价值 + 优先股账面价值 + 长期负债账面价值)/ 普通股市场价值
        """

        long_loan = Stock().read_factor_h5("LongTermLoanDaily")
        preferred_equity = Stock().read_factor_h5("PreferredEquityDaily")
        common_share = Stock().read_factor_h5("CommonShareDaily")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")

        [total_share,
         price_unadjust] = FactorPreProcess().make_same_index_columns(
             [common_share, price_unadjust])
        common_mv = total_share.mul(price_unadjust)

        add = common_mv.add(long_loan, fill_value=0.0)
        add = add.add(preferred_equity, fill_value=0.0)
        market_leverage = add.div(common_mv)

        market_leverage = market_leverage.T.dropna(how='all').T
        self.save_risk_factor_exposure(market_leverage,
                                       self.raw_factor_name_market_leverage)
        market_leverage = FactorPreProcess().remove_extreme_value_mad(
            market_leverage)
        market_leverage = FactorPreProcess().standardization(market_leverage)
        self.save_risk_factor_exposure(market_leverage,
                                       self.factor_name_market_leverage)
Beispiel #3
0
    def cal_factor_exposure(self, beg_date=None, end_date=None):
        """ 计算因子暴露 """

        # read data
        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        square_size_data = size_data**3

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(size_data.columns))
        date_series.sort()
        res_data = pd.DataFrame([])

        # calculate everyday
        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.factor_name, date))
            regression_data = pd.concat(
                [size_data[date], square_size_data[date]], axis=1)
            regression_data.columns = ['x', 'y']
            regression_data = regression_data.dropna()
            y = regression_data['y'].values
            x = regression_data['x'].values
            x_add = sm.add_constant(x)
            model = sm.OLS(y, x_add).fit()
            regression_data['res'] = regression_data['y'] - model.fittedvalues
            res_data_date = pd.DataFrame(regression_data['res'])
            res_data_date.columns = [date]
            res_data = pd.concat([res_data, res_data_date], axis=1)

        res_data = res_data.T.dropna(how='all').T
        res_data = FactorPreProcess().remove_extreme_value_mad(res_data)
        res_data = FactorPreProcess().standardization(res_data)
        self.save_risk_factor_exposure(res_data, self.factor_name)
Beispiel #4
0
    def get_standard_alpha_factor(self, factor_name):
        """ 预处理Alpha因子 包括去极值、标准化 """

        factor_data = self.get_alpha_factor_exposure(factor_name)
        factor_remove = FactorPreProcess().remove_extreme_value_mad(
            factor_data)
        factor_stand = FactorPreProcess().standardization(factor_remove)

        return factor_stand
Beispiel #5
0
    def cal_factor_barra_growth_short_term_predicted_earnings_growth(self):

        """ 未来1年的预期盈利增长 """

        predicted_earnings_growth = Stock().read_factor_h5("FEGR_1")

        self.save_risk_factor_exposure(predicted_earnings_growth, self.raw_factor_name_short_term)
        predicted_earnings_growth = FactorPreProcess().remove_extreme_value_mad(predicted_earnings_growth)
        predicted_earnings_growth = FactorPreProcess().standardization(predicted_earnings_growth)
        self.save_risk_factor_exposure(predicted_earnings_growth, self.factor_name_short_term)
    def cal_trailing_earnings_to_price_ratio(self):
        """ 归母净利润TTM / 总市值 """

        pe_ttm = Stock().read_factor_h5("PE_ttm")
        ep_ttm = 1.0 / pe_ttm

        ep_ttm = ep_ttm.T.dropna(how='all').T
        self.save_risk_factor_exposure(ep_ttm, self.raw_factor_name_trailing)
        ep_ttm = FactorPreProcess().remove_extreme_value_mad(ep_ttm)
        ep_ttm = FactorPreProcess().standardization(ep_ttm)
        self.save_risk_factor_exposure(ep_ttm, self.factor_name_trailing)
Beispiel #7
0
    def cal_factor_liquidity_yearly(self):

        """ LIQUIDITY_STOM 最近252个交易日的换手率总和的对数值 """

        P = 252
        turnover_daily = Stock().read_factor_h5("TurnOver_Daily").T
        turnover_period = turnover_daily.rolling(window=P).sum().applymap(np.log)
        turnover_period = turnover_period.T.dropna(how='all')

        self.save_risk_factor_exposure(turnover_period, self.raw_factor_name_yearly)
        turnover_period = FactorPreProcess().remove_extreme_value_mad(turnover_period)
        turnover_period = FactorPreProcess().standardization(turnover_period)
        self.save_risk_factor_exposure(turnover_period, self.factor_name_yearly)
Beispiel #8
0
    def cal_factor_exposure(self, beg_date, end_date):

        """
        流动性因子 LIQUIDITY
        LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA
        LIQUIDITY 在对 SIZE 因子做回归取残差
        """

        # params
        self.cal_factor_liquidity_month()
        self.cal_factor_liquidity_quarter()
        self.cal_factor_liquidity_yearly()

        # calculate
        turnover_month = 0.35 * self.get_risk_factor_exposure(self.factor_name_month)
        turnover_quarter = 0.35 * self.get_risk_factor_exposure(self.factor_name_quarter)
        turnover_yearly = 0.30 * self.get_risk_factor_exposure(self.factor_name_yearly)

        liquidity = turnover_month.add(turnover_quarter, fill_value=0.0)
        liquidity = liquidity.add(turnover_yearly, fill_value=0.0)
        liquidity = liquidity.T.dropna(how='all').T

        # get res of regression
        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        [size_data, liquidity] = FactorPreProcess().make_same_index_columns([size_data, liquidity])

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(liquidity.columns))
        date_series.sort()

        turnover_res = pd.DataFrame([])

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date))
            regression_data = pd.concat([size_data[date], liquidity[date]], axis=1)
            regression_data.columns = ['x', 'y']
            regression_data = regression_data.dropna()
            y = regression_data['y'].values
            x = regression_data['x'].values
            x_add = sm.add_constant(x)
            model = sm.OLS(y, x_add).fit()
            regression_data['res'] = regression_data['y'] - model.fittedvalues
            res_date = pd.DataFrame(regression_data['res'])
            res_date.columns = [date]
            turnover_res = pd.concat([turnover_res, res_date], axis=1)

        turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res)
        turnover_res = FactorPreProcess().standardization(turnover_res)
        self.save_risk_factor_exposure(turnover_res, self.factor_name)
    def cal_factor_barra_leverage_debt_to_asset(self):
        """ 资产负债比 = 总负债/总资产 """

        total_debt = Stock().read_factor_h5("TotalLiabilityDaily")
        total_asset = Stock().read_factor_h5('TotalAssetDaily')

        debt_to_asset = total_debt.div(total_asset)
        debt_to_asset = debt_to_asset.T.dropna(how='all').T

        self.save_risk_factor_exposure(debt_to_asset,
                                       self.raw_factor_name_debt_to_asset)
        debt_to_asset = FactorPreProcess().remove_extreme_value_mad(
            debt_to_asset)
        debt_to_asset = FactorPreProcess().standardization(debt_to_asset)
        self.save_risk_factor_exposure(debt_to_asset,
                                       self.factor_name_debt_to_asset)
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # read data
        cfo = Stock().read_factor_h5("NetOperateCashFlow")
        cfo_ttm = Stock().change_single_quarter_to_ttm_quarter(cfo)

        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        report_data = Stock().read_factor_h5("ReportDateDaily")

        # data precessing
        cfo_ttm = Stock().change_quarter_to_daily_with_disclosure_date(
            cfo_ttm, report_data, beg_date, end_date)
        [total_share,
         price_unadjust] = FactorPreProcess().make_same_index_columns(
             [total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust) / 100000000
        [cfo_ttm,
         total_mv] = Stock().make_same_index_columns([cfo_ttm, total_mv])
        cfno2p = cfo_ttm.div(total_mv)

        # save data
        cfno2p = cfno2p.T.dropna(how='all').T
        self.save_alpha_factor_exposure(cfno2p, self.raw_factor_name)
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # params
        l, t, half_life, min_period = 21, 504, 126, 400

        # read data
        pct = Stock().read_factor_h5("Pct_chg").T
        pct = np.log(pct / 100.0 + 1.0) * 100

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        res_data = pd.DataFrame([])

        for i_date in range(len(date_series)):

            current_date = date_series[i_date]
            data_end = Date().get_trade_date_offset(current_date, -l + 1)
            data_beg = Date().get_trade_date_offset(current_date, -l - t + 2)
            pct_period = pct.loc[data_beg:data_end, :]
            pct_period = pct_period.dropna(how='all')
            count = pct_period.count()

            if len(pct_period) > min_period:
                print('Calculating Barra Risk factor %s at date %s' %
                      (self.factor_name, current_date))
                weight = TimeSeriesWeight().exponential_weight(
                    len(pct_period), half_life)
                weight_mat = np.tile(np.row_stack(weight),
                                     (1, len(pct_period.columns)))
                weight_pd = pd.DataFrame(weight_mat,
                                         index=pct_period.index,
                                         columns=pct_period.columns)
                pct_weight = pct_period.mul(weight_pd)
                mon = pd.DataFrame(pct_weight.sum(skipna=False))
                mon[count < min_period] = np.nan
                mon.columns = [current_date]
                res_data = pd.concat([res_data, mon], axis=1)
            else:
                print('Calculating Barra Risk factor %s at date %s is null' %
                      (self.factor_name, current_date))

        res_data = res_data.T.dropna(how='all').T
        self.save_risk_factor_exposure(res_data, self.raw_factor_name)
        res_data = FactorPreProcess().remove_extreme_value_mad(res_data)
        res_data = FactorPreProcess().standardization(res_data)
        self.save_risk_factor_exposure(res_data, self.factor_name)
    def cal_predicted_earnings_to_price_ratio(self):
        """ 预期盈利 / 总市值 """

        e1_predicted = Stock().read_factor_h5("FE_1")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")

        e1_predicted, price_unadjust = FactorPreProcess(
        ).make_same_index_columns([e1_predicted, price_unadjust])
        ep1_predicted = e1_predicted.div(price_unadjust)
        ep1_predicted = ep1_predicted.T.dropna(how='all').T

        self.save_risk_factor_exposure(ep1_predicted,
                                       self.raw_factor_name_predicted)
        ep1_predicted = FactorPreProcess().remove_extreme_value_mad(
            ep1_predicted)
        ep1_predicted = FactorPreProcess().standardization(ep1_predicted)
        self.save_risk_factor_exposure(ep1_predicted,
                                       self.factor_name_predicted)
    def cal_factor_exposure(self):
        """ 计算因子暴露 """

        # read data
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        total_share = Stock().read_factor_h5("TotalShare")

        # calculate data
        [price_unadjust,
         total_share] = FactorPreProcess().make_same_index_columns(
             [price_unadjust, total_share])
        total_market_value = price_unadjust.mul(total_share) / 100000000
        log_size_data = np.log(total_market_value)

        # save data
        self.save_risk_factor_exposure(log_size_data, self.raw_factor_name)
        log_size_data = FactorPreProcess().remove_extreme_value_mad(
            log_size_data)
        log_size_data = FactorPreProcess().standardization(log_size_data)
        self.save_risk_factor_exposure(log_size_data, self.factor_name)
    def cal_factor_barra_leverage_book_leverage(self):
        """
        账面杠杆 =(普通股账面价值+优先股账面价值+长期负债)/ 普通股账面价值
        """
        holder_equity = Stock().read_factor_h5("TotalShareHoldeRequityDaily")
        preferred_equity = Stock().read_factor_h5("PreferredEquityDaily")
        common_equity = holder_equity.sub(preferred_equity)
        long_loan = Stock().read_factor_h5("LongTermLoanDaily")

        add = holder_equity.add(long_loan, fill_value=0.0)
        book_leverage = add.div(common_equity)

        book_leverage = book_leverage.T.dropna(how='all').T
        self.save_risk_factor_exposure(book_leverage,
                                       self.raw_factor_name_book_leverage)
        book_leverage = FactorPreProcess().remove_extreme_value_mad(
            book_leverage)
        book_leverage = FactorPreProcess().standardization(book_leverage)
        self.save_risk_factor_exposure(book_leverage,
                                       self.factor_name_book_leverage)
    def cal_factor_exposure(self):
        """ 合成因子 """

        self.cal_factor_barra_leverage_debt_to_asset()
        self.cal_factor_barra_leverage_market_leverage()
        self.cal_factor_barra_leverage_book_leverage()

        debt_to_asset = 0.35 * self.get_risk_factor_exposure(
            self.factor_name_debt_to_asset)
        market_leverage = 0.38 * self.get_risk_factor_exposure(
            self.factor_name_market_leverage)
        book_leverage = 0.27 * self.get_risk_factor_exposure(
            self.factor_name_book_leverage)

        leverage = debt_to_asset.add(market_leverage, fill_value=0.0)
        leverage = leverage.add(book_leverage, fill_value=0.0)

        leverage = FactorPreProcess().remove_extreme_value_mad(leverage)
        leverage = FactorPreProcess().standardization(leverage)
        self.save_risk_factor_exposure(leverage, self.factor_name)
Beispiel #16
0
    def cal_factor_exposure(self, beg_date, end_date):

        """ 合成成长因子 """

        self.cal_factor_barra_growth_long_term_predicted_earnings_growth()
        self.cal_factor_barra_growth_short_term_predicted_earnings_growth()
        self.cal_factor_barra_growth_5years_profit_growth(beg_date, end_date)
        self.cal_factor_barra_growth_5years_sales_growth(beg_date, end_date)

        long_predicted = 0.18 * self.get_risk_factor_exposure(self.factor_name_long_term)
        short_predicted = 0.11 * self.get_risk_factor_exposure(self.factor_name_short_term)
        profit = 0.24 * self.get_risk_factor_exposure(self.factor_name_5y_profit)
        sales = 0.47 * self.get_risk_factor_exposure(self.factor_name_5y_sale)

        growth = long_predicted.add(short_predicted, fill_value=0.0)
        growth = growth.add(profit, fill_value=0.0)
        growth = growth.add(sales, fill_value=0.0)

        growth = FactorPreProcess().remove_extreme_value_mad(growth)
        growth = FactorPreProcess().standardization(growth)
        self.save_risk_factor_exposure(growth, self.factor_name)
Beispiel #17
0
    def cal_stock_covariance(self, date):
        """
        计算 股票 当日 股票协方差矩阵
        sigma = B'FB + S
        """

        factor_covariance = self.get_factor_covariance(date)
        exposure = self.get_factor_exposure_date(
            date, type_list=['COUNTRY', 'STYLE', 'INDUSTRY'])
        if exposure is not None and len(exposure) > 0:

            exposure = exposure[factor_covariance.columns]
            residual_risk = self.get_stock_residual_risk()
            residual_var_diag = np.diag(
                residual_risk[date].map(lambda x: x**2).values)

            code_list = residual_risk.index.values
            residual_var_diag = pd.DataFrame(residual_var_diag,
                                             index=code_list,
                                             columns=code_list)
            public_var = np.dot(
                np.dot(exposure.values, factor_covariance.values),
                exposure.T.values)
            code_list = exposure.index.values
            public_var = pd.DataFrame(public_var,
                                      index=code_list,
                                      columns=code_list)
            residual_var_diag, public_var = FactorPreProcess(
            ).make_same_index_columns([residual_var_diag, public_var])
            total_cov = residual_var_diag.add(public_var)

            path = os.path.join(self.data_path, 'StockCovariance')
            if not os.path.exists(path):
                os.makedirs(path)

            print("Cal Stock Covariance Daily is %s" % date)
            file = os.path.join(path, "StockCovariance_%s.csv" % date)
            total_cov.to_csv(file)
        else:
            print("Exposure is None %s" % date)
    def cal_cash_earnings_to_price_ratio(self, beg_date, end_date):
        """ 经营性现金流净额 / 总市值 """

        nocf = Stock().read_factor_h5("NetOperateCashFlow")
        report_data = Stock().read_factor_h5("ReportDateDaily")
        nocf = Stock().change_single_quarter_to_ttm_quarter(nocf)
        nocf = Stock().change_quarter_to_daily_with_disclosure_date(
            nocf, report_data, beg_date, end_date)
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")

        result = FactorPreProcess().make_same_index_columns(
            [nocf, total_share, price_unadjust])
        nocf, total_share, price_unadjust = result
        total_mv = total_share.mul(price_unadjust) / 100000000
        nocf_mv = nocf.div(total_mv)

        nocf_mv = nocf_mv.T.dropna(how='all').T
        self.save_risk_factor_exposure(nocf_mv, self.raw_factor_name_cash)
        nocf_mv = FactorPreProcess().remove_extreme_value_mad(nocf_mv)
        nocf_mv = FactorPreProcess().standardization(nocf_mv)
        self.save_risk_factor_exposure(nocf_mv, self.factor_name_cash)
Beispiel #19
0
    def cal_factor_barra_growth_5years_profit_growth(self, beg_date, end_date):

        """ 过去5年每股盈利 回归 等差数列 的系数 除以 每股盈利的平均值 """

        report_data = Stock().read_factor_h5("ReportDateDaily")
        eps = Stock().read_factor_h5("EPS_basic").T

        eps_ttm = eps.rolling(4).sum()
        month = eps_ttm.index[-1][4:6]
        eps_ttm_quarter = eps_ttm.index
        eps_ttm_year = list(filter(lambda x: x[4:6] == month, list(eps_ttm.index)))
        eps_ttm = eps_ttm.loc[eps_ttm_year, :]

        eps_ttm_growth = eps_ttm.rolling(5).apply(self.slope)
        eps_ttm_growth = eps_ttm_growth.loc[eps_ttm_quarter, :]
        eps_ttm_growth = eps_ttm_growth.fillna(method='pad', limit=3).T
        eps_ttm_growth = StockFactor().change_quarter_to_daily_with_disclosure_date(eps_ttm_growth, report_data,
                                                                                    beg_date, end_date)
        self.save_risk_factor_exposure(eps_ttm_growth, self.raw_factor_name_5y_profit)
        eps_ttm_growth = FactorPreProcess().remove_extreme_value_mad(eps_ttm_growth)
        eps_ttm_growth = FactorPreProcess().standardization(eps_ttm_growth)
        self.save_risk_factor_exposure(eps_ttm_growth, self.factor_name_5y_profit)
    def cal_factor_exposure(self, beg_date, end_date):
        """
        原始:0.68 * 未来一年预期盈利 / 总市值 +  0.21 * 经营性现金流净额TTM / 总市值 + 0.11 * 归母净利润TTM / 总市值
        由于A股预期数据质量不高 调整三项数据占比 为 0.50 0.30 0.20
        """

        self.cal_predicted_earnings_to_price_ratio()
        self.cal_cash_earnings_to_price_ratio(beg_date, end_date)
        self.cal_trailing_earnings_to_price_ratio()

        predicted_ep = 0.50 * self.get_risk_factor_exposure(
            self.factor_name_predicted)
        cp = 0.30 * self.get_risk_factor_exposure(self.factor_name_cash)
        ep = 0.20 * self.get_risk_factor_exposure(self.factor_name_trailing)

        earning_yield = predicted_ep.add(cp, fill_value=0.0)
        earning_yield = earning_yield.add(ep, fill_value=0.0)

        earning_yield = FactorPreProcess().remove_extreme_value_mad(
            earning_yield)
        earning_yield = FactorPreProcess().standardization(earning_yield)
        self.save_risk_factor_exposure(earning_yield, self.factor_name)
Beispiel #21
0
    def get_data_real(self):

        # 参数
        ###############################################################################################################
        date = "20171229"
        factor_name = "ROEQuarterDaily"
        next_date = Date().get_trade_date_offset(date, 40)

        # read data
        ###############################################################################################################
        price = Stock().read_factor_h5("PriceCloseAdjust")
        alpha_val = Stock().read_factor_h5(factor_name,
                                           Stock().get_h5_path("my_alpha"))

        size = Stock().read_factor_h5("NORMAL_CNE5_SIZE",
                                      Stock().get_h5_path("my_barra_risk"))
        beta = Stock().read_factor_h5("NORMAL_CNE5_BETA",
                                      Stock().get_h5_path("my_barra_risk"))
        nolin_size = Stock().read_factor_h5(
            "NORMAL_CNE5_NON_LINEAR_SIZE",
            Stock().get_h5_path("my_barra_risk"))
        industry = Stock().read_factor_h5("industry_citic1")
        pct = pd.DataFrame(price[next_date] / price[date] - 1.0)

        # make same columns
        ###############################################################################################################
        industry_date = industry[date]
        industry_dummy_date = pd.get_dummies(industry_date)
        industry_columns = list(
            map(lambda x: 'industry_' + str(int(x)),
                list(industry_dummy_date.columns)))
        industry_dummy_date.columns = industry_columns

        data = pd.concat([
            pct, alpha_val[date], size[date], beta[date], nolin_size[date],
            industry_dummy_date
        ],
                         axis=1)
        data = data.dropna()
        data = data
        columns = ['pct', 'alpha', 'size', 'beta', 'nolin_size']
        style_columns = ['size', 'beta', 'nolin_size']
        columns.extend(industry_columns)
        data.columns = columns

        stand = FactorPreProcess().standardization(
            data[['alpha', 'size', 'beta', 'nolin_size']])
        data[['alpha', 'size', 'beta', 'nolin_size']] = stand
        ###############################################################################################################

        return data, style_columns, industry_columns
Beispiel #22
0
    def cal_factor_exposure(self):
        """ 计算因子暴露 """

        # read data
        holder = Stock().read_factor_h5("TotalShareHoldeRequityDaily")
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")

        # data precessing
        [total_share,
         price_unadjust] = FactorPreProcess().make_same_index_columns(
             [total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust)
        [holder,
         total_mv] = Stock().make_same_index_columns([holder, total_mv])
        holder_price = holder.div(total_mv)

        # save data
        pb_data = holder_price.T.dropna(how='all').T
        self.save_risk_factor_exposure(pb_data, self.raw_factor_name)
        pb_data = FactorPreProcess().remove_extreme_value_mad(pb_data)
        pb_data = FactorPreProcess().standardization(pb_data)
        self.save_risk_factor_exposure(pb_data, self.factor_name)
    def change_name(path, dsname, change_dsname):

        from quant.utility.hdf_mfc import HdfMfc
        from quant.utility.factor_preprocess import FactorPreProcess
        import os

        filename = os.path.join(path, dsname + '.h5')
        change_filename = os.path.join(path, change_dsname + '.h5')
        HdfMfc(filename, dsname).rename(dsname, change_filename, change_dsname)
        data = HdfMfc(change_filename,
                      change_dsname).read_hdf_factor(change_dsname)
        month_date_series = Date().get_trade_date_series(
            "20060101", "20180609", "M")
        month_data = data[month_date_series]
        corr = month_data.corr()
        corr.to_csv(os.path.join(path, change_dsname + '_MonthCorr.csv'))
        data_inv = FactorPreProcess().inv_normalization(data)
        # data_inv = data_inv.fillna(0.0)
        # data = FactorPreProcess().remove_extreme_value_mad(data)
        data = FactorPreProcess().standardization(data_inv)
        data.to_csv(os.path.join(path, change_dsname + '.csv'))
        # data = data.fillna(0.0)
        HdfMfc(filename, dsname).write_hdf_factor(change_filename,
                                                  change_dsname, data)
Beispiel #24
0
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # read data
        cash = Stock().read_factor_h5("CashEquivalents")
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        report_data = Stock().read_factor_h5("ReportDateDaily")

        # data precessing
        cash = Stock().change_quarter_to_daily_with_disclosure_date(cash, report_data, beg_date, end_date)
        [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust)
        [cash, total_mv] = Stock().make_same_index_columns([cash, total_mv])
        cp = 4 * cash.div(total_mv)

        # save data
        cp = cp.T.dropna(how='all').T
        self.save_alpha_factor_exposure(cp, self.raw_factor_name)
Beispiel #25
0
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # read data
        holder = Stock().read_factor_h5("TotalShareHoldeRequity")
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        report_data = Stock().read_factor_h5("ReportDateDaily")

        # data precessing
        holder = Stock().change_quarter_to_daily_with_disclosure_date(holder, report_data, beg_date, end_date)
        [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust)
        [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv])
        bp = holder.div(total_mv)

        # save data
        bp = bp.T.dropna(how='all').T
        self.save_alpha_factor_exposure(bp, self.raw_factor_name)
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # read data
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        income = Stock().read_factor_h5("OperatingIncome")
        income = Stock().change_single_quarter_to_ttm_quarter(income)
        report_data = Stock().read_factor_h5("ReportDateDaily")
        income = Stock().change_quarter_to_daily_with_disclosure_date(income, report_data, beg_date, end_date)

        # data precessing
        [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust) / 100000000
        [income, total_mv] = Stock().make_same_index_columns([income, total_mv])
        sp = income.div(total_mv)

        # save data
        sp = sp.T.dropna(how='all').T
        self.save_alpha_factor_exposure(sp, self.raw_factor_name)
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # read data
        income = Stock().read_factor_h5("OperatingIncome")
        cost = Stock().read_factor_h5("OperatingCost")
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        report_data = Stock().read_factor_h5("ReportDateDaily")
        profit = income.sub(cost)

        # data precessing
        profit = Stock().change_quarter_to_daily_with_disclosure_date(profit, report_data, beg_date, end_date)
        [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust) / 100000000
        [profit, total_mv] = Stock().make_same_index_columns([profit, total_mv])
        gross_ep = 4 * profit.div(total_mv)

        # save data
        gross_ep = gross_ep.T.dropna(how='all').T
        self.save_alpha_factor_exposure(gross_ep, self.raw_factor_name)
Beispiel #28
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # read data
        retain = Stock().read_factor_h5("RetainedEarnings")
        total_share = Stock().read_factor_h5("TotalShare")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        report_data = Stock().read_factor_h5("ReportDateDaily")

        # data precessing
        retain = Stock().change_quarter_to_daily_with_disclosure_date(
            retain, report_data, beg_date, end_date)
        [total_share,
         price_unadjust] = FactorPreProcess().make_same_index_columns(
             [total_share, price_unadjust])
        total_mv = total_share.mul(price_unadjust) / 100000000
        [retain,
         total_mv] = Stock().make_same_index_columns([retain, total_mv])
        retain2p = 4 * retain.div(total_mv)

        # save data
        retain2p = retain2p.T.dropna(how='all').T
        self.save_alpha_factor_exposure(retain2p, self.raw_factor_name)
Beispiel #29
0
    def get_data_date(self, date):

        # alpha data date
        ####################################################################################################
        alpha_date_list = list(self.alpha_data.columns)
        alpha_date_list = list(filter(lambda x: x <= date, alpha_date_list))

        alpha_date = pd.DataFrame(self.alpha_data[max(alpha_date_list)])
        alpha_date.columns = [self.alpha_factor_name]
        # alpha_date = FactorPreProcess().standardization(alpha_date)

        # industry data date
        ####################################################################################################
        risk_factor_name = []
        type_list = ['INDUSTRY']
        barra_industry_date = Barra().get_factor_exposure_date(
            date=date, type_list=type_list)
        industry_columns = barra_industry_date.columns
        risk_factor_name.extend(industry_columns)
        self.industry_factor_name = industry_columns
        self.risk_factor_name = risk_factor_name

        # style data date
        ####################################################################################################
        type_list = ['STYLE']
        barra_style_date = Barra().get_factor_exposure_date(
            date=date, type_list=type_list)
        barra_style_date = FactorPreProcess().standardization(barra_style_date)
        style_columns = barra_style_date.columns
        risk_factor_name.extend(style_columns)
        self.style_factor_name = style_columns
        self.risk_factor_name = risk_factor_name

        free_mv_date = pd.DataFrame(self.free_mv_data[date])
        free_mv_date.columns = ['FreeMv']

        return alpha_date, barra_industry_date, barra_style_date, free_mv_date
Beispiel #30
0
    def cal_fmp(self, fmp_name, type="Equal"):
        """
        type = 'Equal' 对角线全为1
        type = 'FreeMvSqrt' 对角线为自由流通市值的平方根
        type = 'BarraStockCov' 对角线为Barra估计的股票协方差矩阵
        """

        for i_date in range(len(self.change_date_series) - 1):

            # read alpha data
            ####################################################################################################
            date = self.change_date_series[i_date]
            alpha_date, industry_dummy_date, barra_style_date, free_mv_date = self.get_data_date(
                date)
            alpha_date = alpha_date.dropna()
            alpha_date = FactorPreProcess().remove_extreme_value_mad(
                alpha_date)
            alpha_date = FactorPreProcess().standardization(alpha_date)
            code_list = list(alpha_date.index)
            code_list.sort()
            alpha_date = alpha_date.loc[code_list, :]

            # data
            ####################################################################################################
            if type == 'BarraStockCov':

                stock_cov = Barra().get_stock_covariance(date)
                code_list = list(set(alpha_date.index) & set(stock_cov.index))
                code_list.sort()
                alpha_date = alpha_date.loc[code_list, :]
                stock_cov = stock_cov.loc[code_list, code_list]
                alpha_date = FactorPreProcess().remove_extreme_value_mad(
                    alpha_date)
                alpha_date = FactorPreProcess().standardization(alpha_date)

            if len(alpha_date) > self.min_stock_num:

                if type == 'Equal':
                    P = np.diag(np.ones(shape=(1, len(alpha_date)))[0])
                elif type == 'BarraStockCov':
                    P = stock_cov.values

                Q = np.zeros(shape=(P.shape[0], 1))
                A = np.column_stack(alpha_date.values)
                A_add = np.ones(shape=(1, P.shape[0]))
                A = np.row_stack((A, A_add))
                b = np.array([[1.0], [0.0]])
                try:
                    P = matrix(P)
                    Q = matrix(Q)
                    A = matrix(A)
                    b = matrix(b)
                    result = sol.qp(P, q=Q, A=A, b=b)
                    fmp_raw_alpha = pd.DataFrame(np.array(result['x'][0:]),
                                                 columns=[date],
                                                 index=code_list).T
                    print(
                        "########## factor mimicking portfolio At %s ##########"
                        % date)
                    concat_data = pd.concat([fmp_raw_alpha.T, alpha_date],
                                            axis=1)
                    concat_data = concat_data.dropna()
                    print(concat_data.corr().values[0][0])
                except Exception as e:
                    fmp_raw_alpha = pd.DataFrame([],
                                                 columns=[date],
                                                 index=code_list).T
                    print(
                        "########## Quadratic Programming FMP is InCorrect %s ##########"
                        % date)

            # concat
            ####################################################################################################
            if i_date == 0:
                fmp_raw_alpha_all = fmp_raw_alpha
            else:
                fmp_raw_alpha_all = pd.concat(
                    [fmp_raw_alpha_all, fmp_raw_alpha], axis=0)

        # write data
        ####################################################################################################
        sub_path = os.path.join(self.path, 'fmp')
        file = os.path.join(
            sub_path,
            '%s_%s_%s.csv' % (self.alpha_factor_name, fmp_name, type))
        fmp_raw_alpha_all = fmp_raw_alpha_all.T
        fmp_raw_alpha_all.to_csv(file)